LLVM 19.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
17#include "AArch64PointerAuth.h"
18#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
40#include "llvm/IR/DebugLoc.h"
41#include "llvm/IR/GlobalValue.h"
42#include "llvm/MC/MCAsmInfo.h"
43#include "llvm/MC/MCInst.h"
45#include "llvm/MC/MCInstrDesc.h"
50#include "llvm/Support/LEB128.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57#include <utility>
58
59using namespace llvm;
60
61#define GET_INSTRINFO_CTOR_DTOR
62#include "AArch64GenInstrInfo.inc"
63
65 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
66 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
67
69 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
70 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
71
73 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
74 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
75
77 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
78 cl::desc("Restrict range of B instructions (DEBUG)"));
79
81 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
82 AArch64::CATCHRET),
83 RI(STI.getTargetTriple()), Subtarget(STI) {}
84
85/// GetInstSize - Return the number of bytes of code the specified
86/// instruction may be. This returns the maximum number of bytes.
88 const MachineBasicBlock &MBB = *MI.getParent();
89 const MachineFunction *MF = MBB.getParent();
90 const Function &F = MF->getFunction();
91 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
92
93 {
94 auto Op = MI.getOpcode();
95 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
96 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
97 }
98
99 // Meta-instructions emit no code.
100 if (MI.isMetaInstruction())
101 return 0;
102
103 // FIXME: We currently only handle pseudoinstructions that don't get expanded
104 // before the assembly printer.
105 unsigned NumBytes = 0;
106 const MCInstrDesc &Desc = MI.getDesc();
107
108 // Size should be preferably set in
109 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
110 // Specific cases handle instructions of variable sizes
111 switch (Desc.getOpcode()) {
112 default:
113 if (Desc.getSize())
114 return Desc.getSize();
115
116 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
117 // with fixed constant size but not specified in .td file) is a normal
118 // 4-byte insn.
119 NumBytes = 4;
120 break;
121 case TargetOpcode::STACKMAP:
122 // The upper bound for a stackmap intrinsic is the full length of its shadow
123 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
124 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
125 break;
126 case TargetOpcode::PATCHPOINT:
127 // The size of the patchpoint intrinsic is the number of bytes requested
128 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
129 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
130 break;
131 case TargetOpcode::STATEPOINT:
132 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
133 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
134 // No patch bytes means a normal call inst is emitted
135 if (NumBytes == 0)
136 NumBytes = 4;
137 break;
138 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
139 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
140 // instructions are expanded to the specified number of NOPs. Otherwise,
141 // they are expanded to 36-byte XRay sleds.
142 NumBytes =
143 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
144 break;
145 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
146 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
147 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
148 NumBytes = 36;
149 break;
150 case TargetOpcode::PATCHABLE_EVENT_CALL:
151 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
152 NumBytes = 24;
153 break;
154
155 case AArch64::SPACE:
156 NumBytes = MI.getOperand(1).getImm();
157 break;
158 case TargetOpcode::BUNDLE:
159 NumBytes = getInstBundleLength(MI);
160 break;
161 }
162
163 return NumBytes;
164}
165
166unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
167 unsigned Size = 0;
169 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
170 while (++I != E && I->isInsideBundle()) {
171 assert(!I->isBundle() && "No nested bundle!");
173 }
174 return Size;
175}
176
179 // Block ends with fall-through condbranch.
180 switch (LastInst->getOpcode()) {
181 default:
182 llvm_unreachable("Unknown branch instruction?");
183 case AArch64::Bcc:
184 Target = LastInst->getOperand(1).getMBB();
185 Cond.push_back(LastInst->getOperand(0));
186 break;
187 case AArch64::CBZW:
188 case AArch64::CBZX:
189 case AArch64::CBNZW:
190 case AArch64::CBNZX:
191 Target = LastInst->getOperand(1).getMBB();
192 Cond.push_back(MachineOperand::CreateImm(-1));
193 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
194 Cond.push_back(LastInst->getOperand(0));
195 break;
196 case AArch64::TBZW:
197 case AArch64::TBZX:
198 case AArch64::TBNZW:
199 case AArch64::TBNZX:
200 Target = LastInst->getOperand(2).getMBB();
201 Cond.push_back(MachineOperand::CreateImm(-1));
202 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
203 Cond.push_back(LastInst->getOperand(0));
204 Cond.push_back(LastInst->getOperand(1));
205 }
206}
207
208static unsigned getBranchDisplacementBits(unsigned Opc) {
209 switch (Opc) {
210 default:
211 llvm_unreachable("unexpected opcode!");
212 case AArch64::B:
213 return BDisplacementBits;
214 case AArch64::TBNZW:
215 case AArch64::TBZW:
216 case AArch64::TBNZX:
217 case AArch64::TBZX:
218 return TBZDisplacementBits;
219 case AArch64::CBNZW:
220 case AArch64::CBZW:
221 case AArch64::CBNZX:
222 case AArch64::CBZX:
223 return CBZDisplacementBits;
224 case AArch64::Bcc:
225 return BCCDisplacementBits;
226 }
227}
228
230 int64_t BrOffset) const {
231 unsigned Bits = getBranchDisplacementBits(BranchOp);
232 assert(Bits >= 3 && "max branch displacement must be enough to jump"
233 "over conditional branch expansion");
234 return isIntN(Bits, BrOffset / 4);
235}
236
239 switch (MI.getOpcode()) {
240 default:
241 llvm_unreachable("unexpected opcode!");
242 case AArch64::B:
243 return MI.getOperand(0).getMBB();
244 case AArch64::TBZW:
245 case AArch64::TBNZW:
246 case AArch64::TBZX:
247 case AArch64::TBNZX:
248 return MI.getOperand(2).getMBB();
249 case AArch64::CBZW:
250 case AArch64::CBNZW:
251 case AArch64::CBZX:
252 case AArch64::CBNZX:
253 case AArch64::Bcc:
254 return MI.getOperand(1).getMBB();
255 }
256}
257
259 MachineBasicBlock &NewDestBB,
260 MachineBasicBlock &RestoreBB,
261 const DebugLoc &DL,
262 int64_t BrOffset,
263 RegScavenger *RS) const {
264 assert(RS && "RegScavenger required for long branching");
265 assert(MBB.empty() &&
266 "new block should be inserted for expanding unconditional branch");
267 assert(MBB.pred_size() == 1);
268 assert(RestoreBB.empty() &&
269 "restore block should be inserted for restoring clobbered registers");
270
271 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
272 // Offsets outside of the signed 33-bit range are not supported for ADRP +
273 // ADD.
274 if (!isInt<33>(BrOffset))
276 "Branch offsets outside of the signed 33-bit range not supported");
277
278 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
279 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
280 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
281 .addReg(Reg)
282 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
283 .addImm(0);
284 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
285 };
286
288 // If X16 is unused, we can rely on the linker to insert a range extension
289 // thunk if NewDestBB is out of range of a single B instruction.
290 constexpr Register Reg = AArch64::X16;
291 if (!RS->isRegUsed(Reg)) {
292 insertUnconditionalBranch(MBB, &NewDestBB, DL);
293 RS->setRegUsed(Reg);
294 return;
295 }
296
297 // If there's a free register and it's worth inflating the code size,
298 // manually insert the indirect branch.
299 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
300 if (Scavenged != AArch64::NoRegister &&
302 buildIndirectBranch(Scavenged, NewDestBB);
303 RS->setRegUsed(Scavenged);
304 return;
305 }
306
307 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
308 // with red zones.
310 if (!AFI || AFI->hasRedZone().value_or(true))
312 "Unable to insert indirect branch inside function that has red zone");
313
314 // Otherwise, spill X16 and defer range extension to the linker.
315 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
316 .addReg(AArch64::SP, RegState::Define)
317 .addReg(Reg)
318 .addReg(AArch64::SP)
319 .addImm(-16);
320
321 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
322
323 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
324 .addReg(AArch64::SP, RegState::Define)
326 .addReg(AArch64::SP)
327 .addImm(16);
328}
329
330// Branch analysis.
333 MachineBasicBlock *&FBB,
335 bool AllowModify) const {
336 // If the block has no terminators, it just falls into the block after it.
338 if (I == MBB.end())
339 return false;
340
341 // Skip over SpeculationBarrierEndBB terminators
342 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
343 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
344 --I;
345 }
346
347 if (!isUnpredicatedTerminator(*I))
348 return false;
349
350 // Get the last instruction in the block.
351 MachineInstr *LastInst = &*I;
352
353 // If there is only one terminator instruction, process it.
354 unsigned LastOpc = LastInst->getOpcode();
355 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
356 if (isUncondBranchOpcode(LastOpc)) {
357 TBB = LastInst->getOperand(0).getMBB();
358 return false;
359 }
360 if (isCondBranchOpcode(LastOpc)) {
361 // Block ends with fall-through condbranch.
362 parseCondBranch(LastInst, TBB, Cond);
363 return false;
364 }
365 return true; // Can't handle indirect branch.
366 }
367
368 // Get the instruction before it if it is a terminator.
369 MachineInstr *SecondLastInst = &*I;
370 unsigned SecondLastOpc = SecondLastInst->getOpcode();
371
372 // If AllowModify is true and the block ends with two or more unconditional
373 // branches, delete all but the first unconditional branch.
374 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
375 while (isUncondBranchOpcode(SecondLastOpc)) {
376 LastInst->eraseFromParent();
377 LastInst = SecondLastInst;
378 LastOpc = LastInst->getOpcode();
379 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
380 // Return now the only terminator is an unconditional branch.
381 TBB = LastInst->getOperand(0).getMBB();
382 return false;
383 }
384 SecondLastInst = &*I;
385 SecondLastOpc = SecondLastInst->getOpcode();
386 }
387 }
388
389 // If we're allowed to modify and the block ends in a unconditional branch
390 // which could simply fallthrough, remove the branch. (Note: This case only
391 // matters when we can't understand the whole sequence, otherwise it's also
392 // handled by BranchFolding.cpp.)
393 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
395 LastInst->eraseFromParent();
396 LastInst = SecondLastInst;
397 LastOpc = LastInst->getOpcode();
398 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
399 assert(!isUncondBranchOpcode(LastOpc) &&
400 "unreachable unconditional branches removed above");
401
402 if (isCondBranchOpcode(LastOpc)) {
403 // Block ends with fall-through condbranch.
404 parseCondBranch(LastInst, TBB, Cond);
405 return false;
406 }
407 return true; // Can't handle indirect branch.
408 }
409 SecondLastInst = &*I;
410 SecondLastOpc = SecondLastInst->getOpcode();
411 }
412
413 // If there are three terminators, we don't know what sort of block this is.
414 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
415 return true;
416
417 // If the block ends with a B and a Bcc, handle it.
418 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
419 parseCondBranch(SecondLastInst, TBB, Cond);
420 FBB = LastInst->getOperand(0).getMBB();
421 return false;
422 }
423
424 // If the block ends with two unconditional branches, handle it. The second
425 // one is not executed, so remove it.
426 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
427 TBB = SecondLastInst->getOperand(0).getMBB();
428 I = LastInst;
429 if (AllowModify)
430 I->eraseFromParent();
431 return false;
432 }
433
434 // ...likewise if it ends with an indirect branch followed by an unconditional
435 // branch.
436 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
437 I = LastInst;
438 if (AllowModify)
439 I->eraseFromParent();
440 return true;
441 }
442
443 // Otherwise, can't handle this.
444 return true;
445}
446
448 MachineBranchPredicate &MBP,
449 bool AllowModify) const {
450 // For the moment, handle only a block which ends with a cb(n)zx followed by
451 // a fallthrough. Why this? Because it is a common form.
452 // TODO: Should we handle b.cc?
453
455 if (I == MBB.end())
456 return true;
457
458 // Skip over SpeculationBarrierEndBB terminators
459 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
460 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
461 --I;
462 }
463
464 if (!isUnpredicatedTerminator(*I))
465 return true;
466
467 // Get the last instruction in the block.
468 MachineInstr *LastInst = &*I;
469 unsigned LastOpc = LastInst->getOpcode();
470 if (!isCondBranchOpcode(LastOpc))
471 return true;
472
473 switch (LastOpc) {
474 default:
475 return true;
476 case AArch64::CBZW:
477 case AArch64::CBZX:
478 case AArch64::CBNZW:
479 case AArch64::CBNZX:
480 break;
481 };
482
483 MBP.TrueDest = LastInst->getOperand(1).getMBB();
484 assert(MBP.TrueDest && "expected!");
485 MBP.FalseDest = MBB.getNextNode();
486
487 MBP.ConditionDef = nullptr;
488 MBP.SingleUseCondition = false;
489
490 MBP.LHS = LastInst->getOperand(0);
491 MBP.RHS = MachineOperand::CreateImm(0);
492 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
493 : MachineBranchPredicate::PRED_EQ;
494 return false;
495}
496
499 if (Cond[0].getImm() != -1) {
500 // Regular Bcc
503 } else {
504 // Folded compare-and-branch
505 switch (Cond[1].getImm()) {
506 default:
507 llvm_unreachable("Unknown conditional branch!");
508 case AArch64::CBZW:
509 Cond[1].setImm(AArch64::CBNZW);
510 break;
511 case AArch64::CBNZW:
512 Cond[1].setImm(AArch64::CBZW);
513 break;
514 case AArch64::CBZX:
515 Cond[1].setImm(AArch64::CBNZX);
516 break;
517 case AArch64::CBNZX:
518 Cond[1].setImm(AArch64::CBZX);
519 break;
520 case AArch64::TBZW:
521 Cond[1].setImm(AArch64::TBNZW);
522 break;
523 case AArch64::TBNZW:
524 Cond[1].setImm(AArch64::TBZW);
525 break;
526 case AArch64::TBZX:
527 Cond[1].setImm(AArch64::TBNZX);
528 break;
529 case AArch64::TBNZX:
530 Cond[1].setImm(AArch64::TBZX);
531 break;
532 }
533 }
534
535 return false;
536}
537
539 int *BytesRemoved) const {
541 if (I == MBB.end())
542 return 0;
543
544 if (!isUncondBranchOpcode(I->getOpcode()) &&
545 !isCondBranchOpcode(I->getOpcode()))
546 return 0;
547
548 // Remove the branch.
549 I->eraseFromParent();
550
551 I = MBB.end();
552
553 if (I == MBB.begin()) {
554 if (BytesRemoved)
555 *BytesRemoved = 4;
556 return 1;
557 }
558 --I;
559 if (!isCondBranchOpcode(I->getOpcode())) {
560 if (BytesRemoved)
561 *BytesRemoved = 4;
562 return 1;
563 }
564
565 // Remove the branch.
566 I->eraseFromParent();
567 if (BytesRemoved)
568 *BytesRemoved = 8;
569
570 return 2;
571}
572
573void AArch64InstrInfo::instantiateCondBranch(
576 if (Cond[0].getImm() != -1) {
577 // Regular Bcc
578 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
579 } else {
580 // Folded compare-and-branch
581 // Note that we use addOperand instead of addReg to keep the flags.
582 const MachineInstrBuilder MIB =
583 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
584 if (Cond.size() > 3)
585 MIB.addImm(Cond[3].getImm());
586 MIB.addMBB(TBB);
587 }
588}
589
592 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
593 // Shouldn't be a fall through.
594 assert(TBB && "insertBranch must not be told to insert a fallthrough");
595
596 if (!FBB) {
597 if (Cond.empty()) // Unconditional branch?
598 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
599 else
600 instantiateCondBranch(MBB, DL, TBB, Cond);
601
602 if (BytesAdded)
603 *BytesAdded = 4;
604
605 return 1;
606 }
607
608 // Two-way conditional branch.
609 instantiateCondBranch(MBB, DL, TBB, Cond);
610 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
611
612 if (BytesAdded)
613 *BytesAdded = 8;
614
615 return 2;
616}
617
618// Find the original register that VReg is copied from.
619static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
620 while (Register::isVirtualRegister(VReg)) {
621 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
622 if (!DefMI->isFullCopy())
623 return VReg;
624 VReg = DefMI->getOperand(1).getReg();
625 }
626 return VReg;
627}
628
629// Determine if VReg is defined by an instruction that can be folded into a
630// csel instruction. If so, return the folded opcode, and the replacement
631// register.
632static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
633 unsigned *NewVReg = nullptr) {
634 VReg = removeCopies(MRI, VReg);
636 return 0;
637
638 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
639 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
640 unsigned Opc = 0;
641 unsigned SrcOpNum = 0;
642 switch (DefMI->getOpcode()) {
643 case AArch64::ADDSXri:
644 case AArch64::ADDSWri:
645 // if NZCV is used, do not fold.
646 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
647 return 0;
648 // fall-through to ADDXri and ADDWri.
649 [[fallthrough]];
650 case AArch64::ADDXri:
651 case AArch64::ADDWri:
652 // add x, 1 -> csinc.
653 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
654 DefMI->getOperand(3).getImm() != 0)
655 return 0;
656 SrcOpNum = 1;
657 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
658 break;
659
660 case AArch64::ORNXrr:
661 case AArch64::ORNWrr: {
662 // not x -> csinv, represented as orn dst, xzr, src.
663 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
664 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
665 return 0;
666 SrcOpNum = 2;
667 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
668 break;
669 }
670
671 case AArch64::SUBSXrr:
672 case AArch64::SUBSWrr:
673 // if NZCV is used, do not fold.
674 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
675 return 0;
676 // fall-through to SUBXrr and SUBWrr.
677 [[fallthrough]];
678 case AArch64::SUBXrr:
679 case AArch64::SUBWrr: {
680 // neg x -> csneg, represented as sub dst, xzr, src.
681 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
682 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
683 return 0;
684 SrcOpNum = 2;
685 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
686 break;
687 }
688 default:
689 return 0;
690 }
691 assert(Opc && SrcOpNum && "Missing parameters");
692
693 if (NewVReg)
694 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
695 return Opc;
696}
697
700 Register DstReg, Register TrueReg,
701 Register FalseReg, int &CondCycles,
702 int &TrueCycles,
703 int &FalseCycles) const {
704 // Check register classes.
706 const TargetRegisterClass *RC =
707 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
708 if (!RC)
709 return false;
710
711 // Also need to check the dest regclass, in case we're trying to optimize
712 // something like:
713 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
714 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
715 return false;
716
717 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
718 unsigned ExtraCondLat = Cond.size() != 1;
719
720 // GPRs are handled by csel.
721 // FIXME: Fold in x+1, -x, and ~x when applicable.
722 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
723 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
724 // Single-cycle csel, csinc, csinv, and csneg.
725 CondCycles = 1 + ExtraCondLat;
726 TrueCycles = FalseCycles = 1;
727 if (canFoldIntoCSel(MRI, TrueReg))
728 TrueCycles = 0;
729 else if (canFoldIntoCSel(MRI, FalseReg))
730 FalseCycles = 0;
731 return true;
732 }
733
734 // Scalar floating point is handled by fcsel.
735 // FIXME: Form fabs, fmin, and fmax when applicable.
736 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
737 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
738 CondCycles = 5 + ExtraCondLat;
739 TrueCycles = FalseCycles = 2;
740 return true;
741 }
742
743 // Can't do vectors.
744 return false;
745}
746
749 const DebugLoc &DL, Register DstReg,
751 Register TrueReg, Register FalseReg) const {
753
754 // Parse the condition code, see parseCondBranch() above.
756 switch (Cond.size()) {
757 default:
758 llvm_unreachable("Unknown condition opcode in Cond");
759 case 1: // b.cc
760 CC = AArch64CC::CondCode(Cond[0].getImm());
761 break;
762 case 3: { // cbz/cbnz
763 // We must insert a compare against 0.
764 bool Is64Bit;
765 switch (Cond[1].getImm()) {
766 default:
767 llvm_unreachable("Unknown branch opcode in Cond");
768 case AArch64::CBZW:
769 Is64Bit = false;
771 break;
772 case AArch64::CBZX:
773 Is64Bit = true;
775 break;
776 case AArch64::CBNZW:
777 Is64Bit = false;
779 break;
780 case AArch64::CBNZX:
781 Is64Bit = true;
783 break;
784 }
785 Register SrcReg = Cond[2].getReg();
786 if (Is64Bit) {
787 // cmp reg, #0 is actually subs xzr, reg, #0.
788 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
789 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
790 .addReg(SrcReg)
791 .addImm(0)
792 .addImm(0);
793 } else {
794 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
795 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
796 .addReg(SrcReg)
797 .addImm(0)
798 .addImm(0);
799 }
800 break;
801 }
802 case 4: { // tbz/tbnz
803 // We must insert a tst instruction.
804 switch (Cond[1].getImm()) {
805 default:
806 llvm_unreachable("Unknown branch opcode in Cond");
807 case AArch64::TBZW:
808 case AArch64::TBZX:
810 break;
811 case AArch64::TBNZW:
812 case AArch64::TBNZX:
814 break;
815 }
816 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
817 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
818 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
819 .addReg(Cond[2].getReg())
820 .addImm(
821 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
822 else
823 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
824 .addReg(Cond[2].getReg())
825 .addImm(
826 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
827 break;
828 }
829 }
830
831 unsigned Opc = 0;
832 const TargetRegisterClass *RC = nullptr;
833 bool TryFold = false;
834 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
835 RC = &AArch64::GPR64RegClass;
836 Opc = AArch64::CSELXr;
837 TryFold = true;
838 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
839 RC = &AArch64::GPR32RegClass;
840 Opc = AArch64::CSELWr;
841 TryFold = true;
842 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
843 RC = &AArch64::FPR64RegClass;
844 Opc = AArch64::FCSELDrrr;
845 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
846 RC = &AArch64::FPR32RegClass;
847 Opc = AArch64::FCSELSrrr;
848 }
849 assert(RC && "Unsupported regclass");
850
851 // Try folding simple instructions into the csel.
852 if (TryFold) {
853 unsigned NewVReg = 0;
854 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
855 if (FoldedOpc) {
856 // The folded opcodes csinc, csinc and csneg apply the operation to
857 // FalseReg, so we need to invert the condition.
859 TrueReg = FalseReg;
860 } else
861 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
862
863 // Fold the operation. Leave any dead instructions for DCE to clean up.
864 if (FoldedOpc) {
865 FalseReg = NewVReg;
866 Opc = FoldedOpc;
867 // The extends the live range of NewVReg.
868 MRI.clearKillFlags(NewVReg);
869 }
870 }
871
872 // Pull all virtual register into the appropriate class.
873 MRI.constrainRegClass(TrueReg, RC);
874 MRI.constrainRegClass(FalseReg, RC);
875
876 // Insert the csel.
877 BuildMI(MBB, I, DL, get(Opc), DstReg)
878 .addReg(TrueReg)
879 .addReg(FalseReg)
880 .addImm(CC);
881}
882
883// Return true if Imm can be loaded into a register by a "cheap" sequence of
884// instructions. For now, "cheap" means at most two instructions.
885static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
886 if (BitSize == 32)
887 return true;
888
889 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
890 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
892 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
893
894 return Is.size() <= 2;
895}
896
897// FIXME: this implementation should be micro-architecture dependent, so a
898// micro-architecture target hook should be introduced here in future.
900 if (Subtarget.hasExynosCheapAsMoveHandling()) {
901 if (isExynosCheapAsMove(MI))
902 return true;
903 return MI.isAsCheapAsAMove();
904 }
905
906 switch (MI.getOpcode()) {
907 default:
908 return MI.isAsCheapAsAMove();
909
910 case AArch64::ADDWrs:
911 case AArch64::ADDXrs:
912 case AArch64::SUBWrs:
913 case AArch64::SUBXrs:
914 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
915
916 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
917 // ORRXri, it is as cheap as MOV.
918 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
919 case AArch64::MOVi32imm:
920 return isCheapImmediate(MI, 32);
921 case AArch64::MOVi64imm:
922 return isCheapImmediate(MI, 64);
923 }
924}
925
927 switch (MI.getOpcode()) {
928 default:
929 return false;
930
931 case AArch64::ADDWrs:
932 case AArch64::ADDXrs:
933 case AArch64::ADDSWrs:
934 case AArch64::ADDSXrs: {
935 unsigned Imm = MI.getOperand(3).getImm();
936 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
937 if (ShiftVal == 0)
938 return true;
939 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
940 }
941
942 case AArch64::ADDWrx:
943 case AArch64::ADDXrx:
944 case AArch64::ADDXrx64:
945 case AArch64::ADDSWrx:
946 case AArch64::ADDSXrx:
947 case AArch64::ADDSXrx64: {
948 unsigned Imm = MI.getOperand(3).getImm();
949 switch (AArch64_AM::getArithExtendType(Imm)) {
950 default:
951 return false;
952 case AArch64_AM::UXTB:
953 case AArch64_AM::UXTH:
954 case AArch64_AM::UXTW:
955 case AArch64_AM::UXTX:
956 return AArch64_AM::getArithShiftValue(Imm) <= 4;
957 }
958 }
959
960 case AArch64::SUBWrs:
961 case AArch64::SUBSWrs: {
962 unsigned Imm = MI.getOperand(3).getImm();
963 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
964 return ShiftVal == 0 ||
965 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
966 }
967
968 case AArch64::SUBXrs:
969 case AArch64::SUBSXrs: {
970 unsigned Imm = MI.getOperand(3).getImm();
971 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
972 return ShiftVal == 0 ||
973 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
974 }
975
976 case AArch64::SUBWrx:
977 case AArch64::SUBXrx:
978 case AArch64::SUBXrx64:
979 case AArch64::SUBSWrx:
980 case AArch64::SUBSXrx:
981 case AArch64::SUBSXrx64: {
982 unsigned Imm = MI.getOperand(3).getImm();
983 switch (AArch64_AM::getArithExtendType(Imm)) {
984 default:
985 return false;
986 case AArch64_AM::UXTB:
987 case AArch64_AM::UXTH:
988 case AArch64_AM::UXTW:
989 case AArch64_AM::UXTX:
990 return AArch64_AM::getArithShiftValue(Imm) == 0;
991 }
992 }
993
994 case AArch64::LDRBBroW:
995 case AArch64::LDRBBroX:
996 case AArch64::LDRBroW:
997 case AArch64::LDRBroX:
998 case AArch64::LDRDroW:
999 case AArch64::LDRDroX:
1000 case AArch64::LDRHHroW:
1001 case AArch64::LDRHHroX:
1002 case AArch64::LDRHroW:
1003 case AArch64::LDRHroX:
1004 case AArch64::LDRQroW:
1005 case AArch64::LDRQroX:
1006 case AArch64::LDRSBWroW:
1007 case AArch64::LDRSBWroX:
1008 case AArch64::LDRSBXroW:
1009 case AArch64::LDRSBXroX:
1010 case AArch64::LDRSHWroW:
1011 case AArch64::LDRSHWroX:
1012 case AArch64::LDRSHXroW:
1013 case AArch64::LDRSHXroX:
1014 case AArch64::LDRSWroW:
1015 case AArch64::LDRSWroX:
1016 case AArch64::LDRSroW:
1017 case AArch64::LDRSroX:
1018 case AArch64::LDRWroW:
1019 case AArch64::LDRWroX:
1020 case AArch64::LDRXroW:
1021 case AArch64::LDRXroX:
1022 case AArch64::PRFMroW:
1023 case AArch64::PRFMroX:
1024 case AArch64::STRBBroW:
1025 case AArch64::STRBBroX:
1026 case AArch64::STRBroW:
1027 case AArch64::STRBroX:
1028 case AArch64::STRDroW:
1029 case AArch64::STRDroX:
1030 case AArch64::STRHHroW:
1031 case AArch64::STRHHroX:
1032 case AArch64::STRHroW:
1033 case AArch64::STRHroX:
1034 case AArch64::STRQroW:
1035 case AArch64::STRQroX:
1036 case AArch64::STRSroW:
1037 case AArch64::STRSroX:
1038 case AArch64::STRWroW:
1039 case AArch64::STRWroX:
1040 case AArch64::STRXroW:
1041 case AArch64::STRXroX: {
1042 unsigned IsSigned = MI.getOperand(3).getImm();
1043 return !IsSigned;
1044 }
1045 }
1046}
1047
1049 unsigned Opc = MI.getOpcode();
1050 switch (Opc) {
1051 default:
1052 return false;
1053 case AArch64::SEH_StackAlloc:
1054 case AArch64::SEH_SaveFPLR:
1055 case AArch64::SEH_SaveFPLR_X:
1056 case AArch64::SEH_SaveReg:
1057 case AArch64::SEH_SaveReg_X:
1058 case AArch64::SEH_SaveRegP:
1059 case AArch64::SEH_SaveRegP_X:
1060 case AArch64::SEH_SaveFReg:
1061 case AArch64::SEH_SaveFReg_X:
1062 case AArch64::SEH_SaveFRegP:
1063 case AArch64::SEH_SaveFRegP_X:
1064 case AArch64::SEH_SetFP:
1065 case AArch64::SEH_AddFP:
1066 case AArch64::SEH_Nop:
1067 case AArch64::SEH_PrologEnd:
1068 case AArch64::SEH_EpilogStart:
1069 case AArch64::SEH_EpilogEnd:
1070 case AArch64::SEH_PACSignLR:
1071 case AArch64::SEH_SaveAnyRegQP:
1072 case AArch64::SEH_SaveAnyRegQPX:
1073 return true;
1074 }
1075}
1076
1078 Register &SrcReg, Register &DstReg,
1079 unsigned &SubIdx) const {
1080 switch (MI.getOpcode()) {
1081 default:
1082 return false;
1083 case AArch64::SBFMXri: // aka sxtw
1084 case AArch64::UBFMXri: // aka uxtw
1085 // Check for the 32 -> 64 bit extension case, these instructions can do
1086 // much more.
1087 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1088 return false;
1089 // This is a signed or unsigned 32 -> 64 bit extension.
1090 SrcReg = MI.getOperand(1).getReg();
1091 DstReg = MI.getOperand(0).getReg();
1092 SubIdx = AArch64::sub_32;
1093 return true;
1094 }
1095}
1096
1098 const MachineInstr &MIa, const MachineInstr &MIb) const {
1100 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1101 int64_t OffsetA = 0, OffsetB = 0;
1102 TypeSize WidthA(0, false), WidthB(0, false);
1103 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1104
1105 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1106 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1107
1110 return false;
1111
1112 // Retrieve the base, offset from the base and width. Width
1113 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1114 // base are identical, and the offset of a lower memory access +
1115 // the width doesn't overlap the offset of a higher memory access,
1116 // then the memory accesses are different.
1117 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1118 // are assumed to have the same scale (vscale).
1119 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1120 WidthA, TRI) &&
1121 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1122 WidthB, TRI)) {
1123 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1124 OffsetAIsScalable == OffsetBIsScalable) {
1125 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1126 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1127 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1128 if (LowWidth.isScalable() == OffsetAIsScalable &&
1129 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1130 return true;
1131 }
1132 }
1133 return false;
1134}
1135
1137 const MachineBasicBlock *MBB,
1138 const MachineFunction &MF) const {
1140 return true;
1141
1142 // Do not move an instruction that can be recognized as a branch target.
1143 if (hasBTISemantics(MI))
1144 return true;
1145
1146 switch (MI.getOpcode()) {
1147 case AArch64::HINT:
1148 // CSDB hints are scheduling barriers.
1149 if (MI.getOperand(0).getImm() == 0x14)
1150 return true;
1151 break;
1152 case AArch64::DSB:
1153 case AArch64::ISB:
1154 // DSB and ISB also are scheduling barriers.
1155 return true;
1156 case AArch64::MSRpstatesvcrImm1:
1157 // SMSTART and SMSTOP are also scheduling barriers.
1158 return true;
1159 default:;
1160 }
1161 if (isSEHInstruction(MI))
1162 return true;
1163 auto Next = std::next(MI.getIterator());
1164 return Next != MBB->end() && Next->isCFIInstruction();
1165}
1166
1167/// analyzeCompare - For a comparison instruction, return the source registers
1168/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1169/// Return true if the comparison instruction can be analyzed.
1171 Register &SrcReg2, int64_t &CmpMask,
1172 int64_t &CmpValue) const {
1173 // The first operand can be a frame index where we'd normally expect a
1174 // register.
1175 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1176 if (!MI.getOperand(1).isReg())
1177 return false;
1178
1179 switch (MI.getOpcode()) {
1180 default:
1181 break;
1182 case AArch64::PTEST_PP:
1183 case AArch64::PTEST_PP_ANY:
1184 SrcReg = MI.getOperand(0).getReg();
1185 SrcReg2 = MI.getOperand(1).getReg();
1186 // Not sure about the mask and value for now...
1187 CmpMask = ~0;
1188 CmpValue = 0;
1189 return true;
1190 case AArch64::SUBSWrr:
1191 case AArch64::SUBSWrs:
1192 case AArch64::SUBSWrx:
1193 case AArch64::SUBSXrr:
1194 case AArch64::SUBSXrs:
1195 case AArch64::SUBSXrx:
1196 case AArch64::ADDSWrr:
1197 case AArch64::ADDSWrs:
1198 case AArch64::ADDSWrx:
1199 case AArch64::ADDSXrr:
1200 case AArch64::ADDSXrs:
1201 case AArch64::ADDSXrx:
1202 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1203 SrcReg = MI.getOperand(1).getReg();
1204 SrcReg2 = MI.getOperand(2).getReg();
1205 CmpMask = ~0;
1206 CmpValue = 0;
1207 return true;
1208 case AArch64::SUBSWri:
1209 case AArch64::ADDSWri:
1210 case AArch64::SUBSXri:
1211 case AArch64::ADDSXri:
1212 SrcReg = MI.getOperand(1).getReg();
1213 SrcReg2 = 0;
1214 CmpMask = ~0;
1215 CmpValue = MI.getOperand(2).getImm();
1216 return true;
1217 case AArch64::ANDSWri:
1218 case AArch64::ANDSXri:
1219 // ANDS does not use the same encoding scheme as the others xxxS
1220 // instructions.
1221 SrcReg = MI.getOperand(1).getReg();
1222 SrcReg2 = 0;
1223 CmpMask = ~0;
1225 MI.getOperand(2).getImm(),
1226 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1227 return true;
1228 }
1229
1230 return false;
1231}
1232
1234 MachineBasicBlock *MBB = Instr.getParent();
1235 assert(MBB && "Can't get MachineBasicBlock here");
1236 MachineFunction *MF = MBB->getParent();
1237 assert(MF && "Can't get MachineFunction here");
1241
1242 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1243 ++OpIdx) {
1244 MachineOperand &MO = Instr.getOperand(OpIdx);
1245 const TargetRegisterClass *OpRegCstraints =
1246 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1247
1248 // If there's no constraint, there's nothing to do.
1249 if (!OpRegCstraints)
1250 continue;
1251 // If the operand is a frame index, there's nothing to do here.
1252 // A frame index operand will resolve correctly during PEI.
1253 if (MO.isFI())
1254 continue;
1255
1256 assert(MO.isReg() &&
1257 "Operand has register constraints without being a register!");
1258
1259 Register Reg = MO.getReg();
1260 if (Reg.isPhysical()) {
1261 if (!OpRegCstraints->contains(Reg))
1262 return false;
1263 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1264 !MRI->constrainRegClass(Reg, OpRegCstraints))
1265 return false;
1266 }
1267
1268 return true;
1269}
1270
1271/// Return the opcode that does not set flags when possible - otherwise
1272/// return the original opcode. The caller is responsible to do the actual
1273/// substitution and legality checking.
1275 // Don't convert all compare instructions, because for some the zero register
1276 // encoding becomes the sp register.
1277 bool MIDefinesZeroReg = false;
1278 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1279 MIDefinesZeroReg = true;
1280
1281 switch (MI.getOpcode()) {
1282 default:
1283 return MI.getOpcode();
1284 case AArch64::ADDSWrr:
1285 return AArch64::ADDWrr;
1286 case AArch64::ADDSWri:
1287 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1288 case AArch64::ADDSWrs:
1289 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1290 case AArch64::ADDSWrx:
1291 return AArch64::ADDWrx;
1292 case AArch64::ADDSXrr:
1293 return AArch64::ADDXrr;
1294 case AArch64::ADDSXri:
1295 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1296 case AArch64::ADDSXrs:
1297 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1298 case AArch64::ADDSXrx:
1299 return AArch64::ADDXrx;
1300 case AArch64::SUBSWrr:
1301 return AArch64::SUBWrr;
1302 case AArch64::SUBSWri:
1303 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1304 case AArch64::SUBSWrs:
1305 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1306 case AArch64::SUBSWrx:
1307 return AArch64::SUBWrx;
1308 case AArch64::SUBSXrr:
1309 return AArch64::SUBXrr;
1310 case AArch64::SUBSXri:
1311 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1312 case AArch64::SUBSXrs:
1313 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1314 case AArch64::SUBSXrx:
1315 return AArch64::SUBXrx;
1316 }
1317}
1318
1319enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1320
1321/// True when condition flags are accessed (either by writing or reading)
1322/// on the instruction trace starting at From and ending at To.
1323///
1324/// Note: If From and To are from different blocks it's assumed CC are accessed
1325/// on the path.
1328 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1329 // Early exit if To is at the beginning of the BB.
1330 if (To == To->getParent()->begin())
1331 return true;
1332
1333 // Check whether the instructions are in the same basic block
1334 // If not, assume the condition flags might get modified somewhere.
1335 if (To->getParent() != From->getParent())
1336 return true;
1337
1338 // From must be above To.
1339 assert(std::any_of(
1340 ++To.getReverse(), To->getParent()->rend(),
1341 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1342
1343 // We iterate backward starting at \p To until we hit \p From.
1344 for (const MachineInstr &Instr :
1345 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1346 if (((AccessToCheck & AK_Write) &&
1347 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1348 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1349 return true;
1350 }
1351 return false;
1352}
1353
1354/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1355/// operation which could set the flags in an identical manner
1356bool AArch64InstrInfo::optimizePTestInstr(
1357 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1358 const MachineRegisterInfo *MRI) const {
1359 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1360 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1361 auto NewOp = Pred->getOpcode();
1362 bool OpChanged = false;
1363
1364 unsigned MaskOpcode = Mask->getOpcode();
1365 unsigned PredOpcode = Pred->getOpcode();
1366 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1367 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1368
1369 if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike) &&
1370 getElementSizeForOpcode(MaskOpcode) ==
1371 getElementSizeForOpcode(PredOpcode) &&
1372 Mask->getOperand(1).getImm() == 31) {
1373 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1374 // redundant since WHILE performs an implicit PTEST with an all active
1375 // mask. Must be an all active predicate of matching element size.
1376
1377 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1378 // PTEST_LIKE instruction uses the same all active mask and the element
1379 // size matches. If the PTEST has a condition of any then it is always
1380 // redundant.
1381 if (PredIsPTestLike) {
1382 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1383 if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
1384 return false;
1385 }
1386
1387 // Fallthough to simply remove the PTEST.
1388 } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
1389 PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
1390 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1391 // instruction that sets the flags as PTEST would. This is only valid when
1392 // the condition is any.
1393
1394 // Fallthough to simply remove the PTEST.
1395 } else if (PredIsPTestLike) {
1396 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1397 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1398 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1399 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1400 // performed by the compare could consider fewer lanes for these element
1401 // sizes.
1402 //
1403 // For example, consider
1404 //
1405 // ptrue p0.b ; P0=1111-1111-1111-1111
1406 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1407 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1408 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1409 // ; ^ last active
1410 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1411 // ; ^ last active
1412 //
1413 // where the compare generates a canonical all active 32-bit predicate
1414 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1415 // active flag, whereas the PTEST instruction with the same mask doesn't.
1416 // For PTEST_ANY this doesn't apply as the flags in this case would be
1417 // identical regardless of element size.
1418 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1419 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1420 if ((Mask != PTestLikeMask) ||
1421 (PredElementSize != AArch64::ElementSizeB &&
1422 PTest->getOpcode() != AArch64::PTEST_PP_ANY))
1423 return false;
1424
1425 // Fallthough to simply remove the PTEST.
1426 } else {
1427 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1428 // opcode so the PTEST becomes redundant.
1429 switch (PredOpcode) {
1430 case AArch64::AND_PPzPP:
1431 case AArch64::BIC_PPzPP:
1432 case AArch64::EOR_PPzPP:
1433 case AArch64::NAND_PPzPP:
1434 case AArch64::NOR_PPzPP:
1435 case AArch64::ORN_PPzPP:
1436 case AArch64::ORR_PPzPP:
1437 case AArch64::BRKA_PPzP:
1438 case AArch64::BRKPA_PPzPP:
1439 case AArch64::BRKB_PPzP:
1440 case AArch64::BRKPB_PPzPP:
1441 case AArch64::RDFFR_PPz: {
1442 // Check to see if our mask is the same. If not the resulting flag bits
1443 // may be different and we can't remove the ptest.
1444 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1445 if (Mask != PredMask)
1446 return false;
1447 break;
1448 }
1449 case AArch64::BRKN_PPzP: {
1450 // BRKN uses an all active implicit mask to set flags unlike the other
1451 // flag-setting instructions.
1452 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1453 if ((MaskOpcode != AArch64::PTRUE_B) ||
1454 (Mask->getOperand(1).getImm() != 31))
1455 return false;
1456 break;
1457 }
1458 case AArch64::PTRUE_B:
1459 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1460 break;
1461 default:
1462 // Bail out if we don't recognize the input
1463 return false;
1464 }
1465
1466 NewOp = convertToFlagSettingOpc(PredOpcode);
1467 OpChanged = true;
1468 }
1469
1471
1472 // If another instruction between Pred and PTest accesses flags, don't remove
1473 // the ptest or update the earlier instruction to modify them.
1474 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1475 return false;
1476
1477 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1478 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1479 // operand to be replaced with an equivalent instruction that also sets the
1480 // flags.
1481 Pred->setDesc(get(NewOp));
1482 PTest->eraseFromParent();
1483 if (OpChanged) {
1484 bool succeeded = UpdateOperandRegClass(*Pred);
1485 (void)succeeded;
1486 assert(succeeded && "Operands have incompatible register classes!");
1487 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1488 }
1489
1490 // Ensure that the flags def is live.
1491 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1492 unsigned i = 0, e = Pred->getNumOperands();
1493 for (; i != e; ++i) {
1494 MachineOperand &MO = Pred->getOperand(i);
1495 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1496 MO.setIsDead(false);
1497 break;
1498 }
1499 }
1500 }
1501 return true;
1502}
1503
1504/// Try to optimize a compare instruction. A compare instruction is an
1505/// instruction which produces AArch64::NZCV. It can be truly compare
1506/// instruction
1507/// when there are no uses of its destination register.
1508///
1509/// The following steps are tried in order:
1510/// 1. Convert CmpInstr into an unconditional version.
1511/// 2. Remove CmpInstr if above there is an instruction producing a needed
1512/// condition code or an instruction which can be converted into such an
1513/// instruction.
1514/// Only comparison with zero is supported.
1516 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1517 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1518 assert(CmpInstr.getParent());
1519 assert(MRI);
1520
1521 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1522 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1523 if (DeadNZCVIdx != -1) {
1524 if (CmpInstr.definesRegister(AArch64::WZR) ||
1525 CmpInstr.definesRegister(AArch64::XZR)) {
1526 CmpInstr.eraseFromParent();
1527 return true;
1528 }
1529 unsigned Opc = CmpInstr.getOpcode();
1530 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1531 if (NewOpc == Opc)
1532 return false;
1533 const MCInstrDesc &MCID = get(NewOpc);
1534 CmpInstr.setDesc(MCID);
1535 CmpInstr.removeOperand(DeadNZCVIdx);
1536 bool succeeded = UpdateOperandRegClass(CmpInstr);
1537 (void)succeeded;
1538 assert(succeeded && "Some operands reg class are incompatible!");
1539 return true;
1540 }
1541
1542 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1543 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1544 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1545
1546 if (SrcReg2 != 0)
1547 return false;
1548
1549 // CmpInstr is a Compare instruction if destination register is not used.
1550 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1551 return false;
1552
1553 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1554 return true;
1555 return (CmpValue == 0 || CmpValue == 1) &&
1556 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1557}
1558
1559/// Get opcode of S version of Instr.
1560/// If Instr is S version its opcode is returned.
1561/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1562/// or we are not interested in it.
1563static unsigned sForm(MachineInstr &Instr) {
1564 switch (Instr.getOpcode()) {
1565 default:
1566 return AArch64::INSTRUCTION_LIST_END;
1567
1568 case AArch64::ADDSWrr:
1569 case AArch64::ADDSWri:
1570 case AArch64::ADDSXrr:
1571 case AArch64::ADDSXri:
1572 case AArch64::SUBSWrr:
1573 case AArch64::SUBSWri:
1574 case AArch64::SUBSXrr:
1575 case AArch64::SUBSXri:
1576 return Instr.getOpcode();
1577
1578 case AArch64::ADDWrr:
1579 return AArch64::ADDSWrr;
1580 case AArch64::ADDWri:
1581 return AArch64::ADDSWri;
1582 case AArch64::ADDXrr:
1583 return AArch64::ADDSXrr;
1584 case AArch64::ADDXri:
1585 return AArch64::ADDSXri;
1586 case AArch64::ADCWr:
1587 return AArch64::ADCSWr;
1588 case AArch64::ADCXr:
1589 return AArch64::ADCSXr;
1590 case AArch64::SUBWrr:
1591 return AArch64::SUBSWrr;
1592 case AArch64::SUBWri:
1593 return AArch64::SUBSWri;
1594 case AArch64::SUBXrr:
1595 return AArch64::SUBSXrr;
1596 case AArch64::SUBXri:
1597 return AArch64::SUBSXri;
1598 case AArch64::SBCWr:
1599 return AArch64::SBCSWr;
1600 case AArch64::SBCXr:
1601 return AArch64::SBCSXr;
1602 case AArch64::ANDWri:
1603 return AArch64::ANDSWri;
1604 case AArch64::ANDXri:
1605 return AArch64::ANDSXri;
1606 }
1607}
1608
1609/// Check if AArch64::NZCV should be alive in successors of MBB.
1611 for (auto *BB : MBB->successors())
1612 if (BB->isLiveIn(AArch64::NZCV))
1613 return true;
1614 return false;
1615}
1616
1617/// \returns The condition code operand index for \p Instr if it is a branch
1618/// or select and -1 otherwise.
1619static int
1621 switch (Instr.getOpcode()) {
1622 default:
1623 return -1;
1624
1625 case AArch64::Bcc: {
1626 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1627 assert(Idx >= 2);
1628 return Idx - 2;
1629 }
1630
1631 case AArch64::CSINVWr:
1632 case AArch64::CSINVXr:
1633 case AArch64::CSINCWr:
1634 case AArch64::CSINCXr:
1635 case AArch64::CSELWr:
1636 case AArch64::CSELXr:
1637 case AArch64::CSNEGWr:
1638 case AArch64::CSNEGXr:
1639 case AArch64::FCSELSrrr:
1640 case AArch64::FCSELDrrr: {
1641 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1642 assert(Idx >= 1);
1643 return Idx - 1;
1644 }
1645 }
1646}
1647
1648/// Find a condition code used by the instruction.
1649/// Returns AArch64CC::Invalid if either the instruction does not use condition
1650/// codes or we don't optimize CmpInstr in the presence of such instructions.
1653 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1654 Instr.getOperand(CCIdx).getImm())
1656}
1657
1660 UsedNZCV UsedFlags;
1661 switch (CC) {
1662 default:
1663 break;
1664
1665 case AArch64CC::EQ: // Z set
1666 case AArch64CC::NE: // Z clear
1667 UsedFlags.Z = true;
1668 break;
1669
1670 case AArch64CC::HI: // Z clear and C set
1671 case AArch64CC::LS: // Z set or C clear
1672 UsedFlags.Z = true;
1673 [[fallthrough]];
1674 case AArch64CC::HS: // C set
1675 case AArch64CC::LO: // C clear
1676 UsedFlags.C = true;
1677 break;
1678
1679 case AArch64CC::MI: // N set
1680 case AArch64CC::PL: // N clear
1681 UsedFlags.N = true;
1682 break;
1683
1684 case AArch64CC::VS: // V set
1685 case AArch64CC::VC: // V clear
1686 UsedFlags.V = true;
1687 break;
1688
1689 case AArch64CC::GT: // Z clear, N and V the same
1690 case AArch64CC::LE: // Z set, N and V differ
1691 UsedFlags.Z = true;
1692 [[fallthrough]];
1693 case AArch64CC::GE: // N and V the same
1694 case AArch64CC::LT: // N and V differ
1695 UsedFlags.N = true;
1696 UsedFlags.V = true;
1697 break;
1698 }
1699 return UsedFlags;
1700}
1701
1702/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1703/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1704/// \returns std::nullopt otherwise.
1705///
1706/// Collect instructions using that flags in \p CCUseInstrs if provided.
1707std::optional<UsedNZCV>
1709 const TargetRegisterInfo &TRI,
1710 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1711 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1712 if (MI.getParent() != CmpParent)
1713 return std::nullopt;
1714
1715 if (areCFlagsAliveInSuccessors(CmpParent))
1716 return std::nullopt;
1717
1718 UsedNZCV NZCVUsedAfterCmp;
1720 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1721 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1723 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1724 return std::nullopt;
1725 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1726 if (CCUseInstrs)
1727 CCUseInstrs->push_back(&Instr);
1728 }
1729 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1730 break;
1731 }
1732 return NZCVUsedAfterCmp;
1733}
1734
1735static bool isADDSRegImm(unsigned Opcode) {
1736 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1737}
1738
1739static bool isSUBSRegImm(unsigned Opcode) {
1740 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1741}
1742
1743/// Check if CmpInstr can be substituted by MI.
1744///
1745/// CmpInstr can be substituted:
1746/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1747/// - and, MI and CmpInstr are from the same MachineBB
1748/// - and, condition flags are not alive in successors of the CmpInstr parent
1749/// - and, if MI opcode is the S form there must be no defs of flags between
1750/// MI and CmpInstr
1751/// or if MI opcode is not the S form there must be neither defs of flags
1752/// nor uses of flags between MI and CmpInstr.
1753/// - and, if C/V flags are not used after CmpInstr
1754/// or if N flag is used but MI produces poison value if signed overflow
1755/// occurs.
1757 const TargetRegisterInfo &TRI) {
1758 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1759 // that may or may not set flags.
1760 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1761
1762 const unsigned CmpOpcode = CmpInstr.getOpcode();
1763 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1764 return false;
1765
1766 assert((CmpInstr.getOperand(2).isImm() &&
1767 CmpInstr.getOperand(2).getImm() == 0) &&
1768 "Caller guarantees that CmpInstr compares with constant 0");
1769
1770 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1771 if (!NZVCUsed || NZVCUsed->C)
1772 return false;
1773
1774 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1775 // '%vreg = add ...' or '%vreg = sub ...'.
1776 // Condition flag V is used to indicate signed overflow.
1777 // 1) MI and CmpInstr set N and V to the same value.
1778 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1779 // signed overflow occurs, so CmpInstr could still be simplified away.
1780 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1781 return false;
1782
1783 AccessKind AccessToCheck = AK_Write;
1784 if (sForm(MI) != MI.getOpcode())
1785 AccessToCheck = AK_All;
1786 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1787}
1788
1789/// Substitute an instruction comparing to zero with another instruction
1790/// which produces needed condition flags.
1791///
1792/// Return true on success.
1793bool AArch64InstrInfo::substituteCmpToZero(
1794 MachineInstr &CmpInstr, unsigned SrcReg,
1795 const MachineRegisterInfo &MRI) const {
1796 // Get the unique definition of SrcReg.
1797 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1798 if (!MI)
1799 return false;
1800
1802
1803 unsigned NewOpc = sForm(*MI);
1804 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1805 return false;
1806
1807 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1808 return false;
1809
1810 // Update the instruction to set NZCV.
1811 MI->setDesc(get(NewOpc));
1812 CmpInstr.eraseFromParent();
1813 bool succeeded = UpdateOperandRegClass(*MI);
1814 (void)succeeded;
1815 assert(succeeded && "Some operands reg class are incompatible!");
1816 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1817 return true;
1818}
1819
1820/// \returns True if \p CmpInstr can be removed.
1821///
1822/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1823/// codes used in \p CCUseInstrs must be inverted.
1825 int CmpValue, const TargetRegisterInfo &TRI,
1827 bool &IsInvertCC) {
1828 assert((CmpValue == 0 || CmpValue == 1) &&
1829 "Only comparisons to 0 or 1 considered for removal!");
1830
1831 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1832 unsigned MIOpc = MI.getOpcode();
1833 if (MIOpc == AArch64::CSINCWr) {
1834 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1835 MI.getOperand(2).getReg() != AArch64::WZR)
1836 return false;
1837 } else if (MIOpc == AArch64::CSINCXr) {
1838 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1839 MI.getOperand(2).getReg() != AArch64::XZR)
1840 return false;
1841 } else {
1842 return false;
1843 }
1845 if (MICC == AArch64CC::Invalid)
1846 return false;
1847
1848 // NZCV needs to be defined
1849 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
1850 return false;
1851
1852 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1853 const unsigned CmpOpcode = CmpInstr.getOpcode();
1854 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1855 if (CmpValue && !IsSubsRegImm)
1856 return false;
1857 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1858 return false;
1859
1860 // MI conditions allowed: eq, ne, mi, pl
1861 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1862 if (MIUsedNZCV.C || MIUsedNZCV.V)
1863 return false;
1864
1865 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1866 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1867 // Condition flags are not used in CmpInstr basic block successors and only
1868 // Z or N flags allowed to be used after CmpInstr within its basic block
1869 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1870 return false;
1871 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1872 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1873 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1874 return false;
1875 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1876 if (MIUsedNZCV.N && !CmpValue)
1877 return false;
1878
1879 // There must be no defs of flags between MI and CmpInstr
1880 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1881 return false;
1882
1883 // Condition code is inverted in the following cases:
1884 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1885 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1886 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1887 (!CmpValue && MICC == AArch64CC::NE);
1888 return true;
1889}
1890
1891/// Remove comparison in csinc-cmp sequence
1892///
1893/// Examples:
1894/// 1. \code
1895/// csinc w9, wzr, wzr, ne
1896/// cmp w9, #0
1897/// b.eq
1898/// \endcode
1899/// to
1900/// \code
1901/// csinc w9, wzr, wzr, ne
1902/// b.ne
1903/// \endcode
1904///
1905/// 2. \code
1906/// csinc x2, xzr, xzr, mi
1907/// cmp x2, #1
1908/// b.pl
1909/// \endcode
1910/// to
1911/// \code
1912/// csinc x2, xzr, xzr, mi
1913/// b.pl
1914/// \endcode
1915///
1916/// \param CmpInstr comparison instruction
1917/// \return True when comparison removed
1918bool AArch64InstrInfo::removeCmpToZeroOrOne(
1919 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1920 const MachineRegisterInfo &MRI) const {
1921 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1922 if (!MI)
1923 return false;
1926 bool IsInvertCC = false;
1927 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1928 IsInvertCC))
1929 return false;
1930 // Make transformation
1931 CmpInstr.eraseFromParent();
1932 if (IsInvertCC) {
1933 // Invert condition codes in CmpInstr CC users
1934 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1936 assert(Idx >= 0 && "Unexpected instruction using CC.");
1937 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1939 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1940 CCOperand.setImm(CCUse);
1941 }
1942 }
1943 return true;
1944}
1945
1947 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1948 MI.getOpcode() != AArch64::CATCHRET)
1949 return false;
1950
1951 MachineBasicBlock &MBB = *MI.getParent();
1952 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1953 auto TRI = Subtarget.getRegisterInfo();
1954 DebugLoc DL = MI.getDebugLoc();
1955
1956 if (MI.getOpcode() == AArch64::CATCHRET) {
1957 // Skip to the first instruction before the epilog.
1958 const TargetInstrInfo *TII =
1960 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1962 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1963 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1964 FirstEpilogSEH != MBB.begin())
1965 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1966 if (FirstEpilogSEH != MBB.begin())
1967 FirstEpilogSEH = std::next(FirstEpilogSEH);
1968 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1969 .addReg(AArch64::X0, RegState::Define)
1970 .addMBB(TargetMBB);
1971 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1972 .addReg(AArch64::X0, RegState::Define)
1973 .addReg(AArch64::X0)
1974 .addMBB(TargetMBB)
1975 .addImm(0);
1976 return true;
1977 }
1978
1979 Register Reg = MI.getOperand(0).getReg();
1981 if (M.getStackProtectorGuard() == "sysreg") {
1982 const AArch64SysReg::SysReg *SrcReg =
1983 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
1984 if (!SrcReg)
1985 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
1986
1987 // mrs xN, sysreg
1988 BuildMI(MBB, MI, DL, get(AArch64::MRS))
1990 .addImm(SrcReg->Encoding);
1991 int Offset = M.getStackProtectorGuardOffset();
1992 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
1993 // ldr xN, [xN, #offset]
1994 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1995 .addDef(Reg)
1996 .addUse(Reg, RegState::Kill)
1997 .addImm(Offset / 8);
1998 } else if (Offset >= -256 && Offset <= 255) {
1999 // ldur xN, [xN, #offset]
2000 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2001 .addDef(Reg)
2002 .addUse(Reg, RegState::Kill)
2003 .addImm(Offset);
2004 } else if (Offset >= -4095 && Offset <= 4095) {
2005 if (Offset > 0) {
2006 // add xN, xN, #offset
2007 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2008 .addDef(Reg)
2009 .addUse(Reg, RegState::Kill)
2010 .addImm(Offset)
2011 .addImm(0);
2012 } else {
2013 // sub xN, xN, #offset
2014 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2015 .addDef(Reg)
2016 .addUse(Reg, RegState::Kill)
2017 .addImm(-Offset)
2018 .addImm(0);
2019 }
2020 // ldr xN, [xN]
2021 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2022 .addDef(Reg)
2023 .addUse(Reg, RegState::Kill)
2024 .addImm(0);
2025 } else {
2026 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2027 // than 23760.
2028 // It might be nice to use AArch64::MOVi32imm here, which would get
2029 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2030 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2031 // AArch64FrameLowering might help us find such a scratch register
2032 // though. If we failed to find a scratch register, we could emit a
2033 // stream of add instructions to build up the immediate. Or, we could try
2034 // to insert a AArch64::MOVi32imm before register allocation so that we
2035 // didn't need to scavenge for a scratch register.
2036 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2037 }
2038 MBB.erase(MI);
2039 return true;
2040 }
2041
2042 const GlobalValue *GV =
2043 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2044 const TargetMachine &TM = MBB.getParent()->getTarget();
2045 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2046 const unsigned char MO_NC = AArch64II::MO_NC;
2047
2048 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2049 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2050 .addGlobalAddress(GV, 0, OpFlags);
2051 if (Subtarget.isTargetILP32()) {
2052 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2053 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2054 .addDef(Reg32, RegState::Dead)
2055 .addUse(Reg, RegState::Kill)
2056 .addImm(0)
2057 .addMemOperand(*MI.memoperands_begin())
2059 } else {
2060 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2061 .addReg(Reg, RegState::Kill)
2062 .addImm(0)
2063 .addMemOperand(*MI.memoperands_begin());
2064 }
2065 } else if (TM.getCodeModel() == CodeModel::Large) {
2066 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2067 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2068 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2069 .addImm(0);
2070 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2071 .addReg(Reg, RegState::Kill)
2072 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2073 .addImm(16);
2074 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2075 .addReg(Reg, RegState::Kill)
2076 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2077 .addImm(32);
2078 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2079 .addReg(Reg, RegState::Kill)
2081 .addImm(48);
2082 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2083 .addReg(Reg, RegState::Kill)
2084 .addImm(0)
2085 .addMemOperand(*MI.memoperands_begin());
2086 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2087 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2088 .addGlobalAddress(GV, 0, OpFlags);
2089 } else {
2090 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2091 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2092 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2093 if (Subtarget.isTargetILP32()) {
2094 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2095 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2096 .addDef(Reg32, RegState::Dead)
2097 .addUse(Reg, RegState::Kill)
2098 .addGlobalAddress(GV, 0, LoFlags)
2099 .addMemOperand(*MI.memoperands_begin())
2101 } else {
2102 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2103 .addReg(Reg, RegState::Kill)
2104 .addGlobalAddress(GV, 0, LoFlags)
2105 .addMemOperand(*MI.memoperands_begin());
2106 }
2107 }
2108
2109 MBB.erase(MI);
2110
2111 return true;
2112}
2113
2114// Return true if this instruction simply sets its single destination register
2115// to zero. This is equivalent to a register rename of the zero-register.
2117 switch (MI.getOpcode()) {
2118 default:
2119 break;
2120 case AArch64::MOVZWi:
2121 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2122 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2123 assert(MI.getDesc().getNumOperands() == 3 &&
2124 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2125 return true;
2126 }
2127 break;
2128 case AArch64::ANDWri: // and Rd, Rzr, #imm
2129 return MI.getOperand(1).getReg() == AArch64::WZR;
2130 case AArch64::ANDXri:
2131 return MI.getOperand(1).getReg() == AArch64::XZR;
2132 case TargetOpcode::COPY:
2133 return MI.getOperand(1).getReg() == AArch64::WZR;
2134 }
2135 return false;
2136}
2137
2138// Return true if this instruction simply renames a general register without
2139// modifying bits.
2141 switch (MI.getOpcode()) {
2142 default:
2143 break;
2144 case TargetOpcode::COPY: {
2145 // GPR32 copies will by lowered to ORRXrs
2146 Register DstReg = MI.getOperand(0).getReg();
2147 return (AArch64::GPR32RegClass.contains(DstReg) ||
2148 AArch64::GPR64RegClass.contains(DstReg));
2149 }
2150 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2151 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2152 assert(MI.getDesc().getNumOperands() == 4 &&
2153 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2154 return true;
2155 }
2156 break;
2157 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2158 if (MI.getOperand(2).getImm() == 0) {
2159 assert(MI.getDesc().getNumOperands() == 4 &&
2160 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2161 return true;
2162 }
2163 break;
2164 }
2165 return false;
2166}
2167
2168// Return true if this instruction simply renames a general register without
2169// modifying bits.
2171 switch (MI.getOpcode()) {
2172 default:
2173 break;
2174 case TargetOpcode::COPY: {
2175 Register DstReg = MI.getOperand(0).getReg();
2176 return AArch64::FPR128RegClass.contains(DstReg);
2177 }
2178 case AArch64::ORRv16i8:
2179 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2180 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2181 "invalid ORRv16i8 operands");
2182 return true;
2183 }
2184 break;
2185 }
2186 return false;
2187}
2188
2190 int &FrameIndex) const {
2191 switch (MI.getOpcode()) {
2192 default:
2193 break;
2194 case AArch64::LDRWui:
2195 case AArch64::LDRXui:
2196 case AArch64::LDRBui:
2197 case AArch64::LDRHui:
2198 case AArch64::LDRSui:
2199 case AArch64::LDRDui:
2200 case AArch64::LDRQui:
2201 case AArch64::LDR_PXI:
2202 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2203 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2204 FrameIndex = MI.getOperand(1).getIndex();
2205 return MI.getOperand(0).getReg();
2206 }
2207 break;
2208 }
2209
2210 return 0;
2211}
2212
2214 int &FrameIndex) const {
2215 switch (MI.getOpcode()) {
2216 default:
2217 break;
2218 case AArch64::STRWui:
2219 case AArch64::STRXui:
2220 case AArch64::STRBui:
2221 case AArch64::STRHui:
2222 case AArch64::STRSui:
2223 case AArch64::STRDui:
2224 case AArch64::STRQui:
2225 case AArch64::STR_PXI:
2226 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2227 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2228 FrameIndex = MI.getOperand(1).getIndex();
2229 return MI.getOperand(0).getReg();
2230 }
2231 break;
2232 }
2233 return 0;
2234}
2235
2236/// Check all MachineMemOperands for a hint to suppress pairing.
2238 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2239 return MMO->getFlags() & MOSuppressPair;
2240 });
2241}
2242
2243/// Set a flag on the first MachineMemOperand to suppress pairing.
2245 if (MI.memoperands_empty())
2246 return;
2247 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2248}
2249
2250/// Check all MachineMemOperands for a hint that the load/store is strided.
2252 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2253 return MMO->getFlags() & MOStridedAccess;
2254 });
2255}
2256
2258 switch (Opc) {
2259 default:
2260 return false;
2261 case AArch64::STURSi:
2262 case AArch64::STRSpre:
2263 case AArch64::STURDi:
2264 case AArch64::STRDpre:
2265 case AArch64::STURQi:
2266 case AArch64::STRQpre:
2267 case AArch64::STURBBi:
2268 case AArch64::STURHHi:
2269 case AArch64::STURWi:
2270 case AArch64::STRWpre:
2271 case AArch64::STURXi:
2272 case AArch64::STRXpre:
2273 case AArch64::LDURSi:
2274 case AArch64::LDRSpre:
2275 case AArch64::LDURDi:
2276 case AArch64::LDRDpre:
2277 case AArch64::LDURQi:
2278 case AArch64::LDRQpre:
2279 case AArch64::LDURWi:
2280 case AArch64::LDRWpre:
2281 case AArch64::LDURXi:
2282 case AArch64::LDRXpre:
2283 case AArch64::LDRSWpre:
2284 case AArch64::LDURSWi:
2285 case AArch64::LDURHHi:
2286 case AArch64::LDURBBi:
2287 case AArch64::LDURSBWi:
2288 case AArch64::LDURSHWi:
2289 return true;
2290 }
2291}
2292
2293std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2294 switch (Opc) {
2295 default: return {};
2296 case AArch64::PRFMui: return AArch64::PRFUMi;
2297 case AArch64::LDRXui: return AArch64::LDURXi;
2298 case AArch64::LDRWui: return AArch64::LDURWi;
2299 case AArch64::LDRBui: return AArch64::LDURBi;
2300 case AArch64::LDRHui: return AArch64::LDURHi;
2301 case AArch64::LDRSui: return AArch64::LDURSi;
2302 case AArch64::LDRDui: return AArch64::LDURDi;
2303 case AArch64::LDRQui: return AArch64::LDURQi;
2304 case AArch64::LDRBBui: return AArch64::LDURBBi;
2305 case AArch64::LDRHHui: return AArch64::LDURHHi;
2306 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2307 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2308 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2309 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2310 case AArch64::LDRSWui: return AArch64::LDURSWi;
2311 case AArch64::STRXui: return AArch64::STURXi;
2312 case AArch64::STRWui: return AArch64::STURWi;
2313 case AArch64::STRBui: return AArch64::STURBi;
2314 case AArch64::STRHui: return AArch64::STURHi;
2315 case AArch64::STRSui: return AArch64::STURSi;
2316 case AArch64::STRDui: return AArch64::STURDi;
2317 case AArch64::STRQui: return AArch64::STURQi;
2318 case AArch64::STRBBui: return AArch64::STURBBi;
2319 case AArch64::STRHHui: return AArch64::STURHHi;
2320 }
2321}
2322
2324 switch (Opc) {
2325 default:
2326 return 2;
2327 case AArch64::LDPXi:
2328 case AArch64::LDPDi:
2329 case AArch64::STPXi:
2330 case AArch64::STPDi:
2331 case AArch64::LDNPXi:
2332 case AArch64::LDNPDi:
2333 case AArch64::STNPXi:
2334 case AArch64::STNPDi:
2335 case AArch64::LDPQi:
2336 case AArch64::STPQi:
2337 case AArch64::LDNPQi:
2338 case AArch64::STNPQi:
2339 case AArch64::LDPWi:
2340 case AArch64::LDPSi:
2341 case AArch64::STPWi:
2342 case AArch64::STPSi:
2343 case AArch64::LDNPWi:
2344 case AArch64::LDNPSi:
2345 case AArch64::STNPWi:
2346 case AArch64::STNPSi:
2347 case AArch64::LDG:
2348 case AArch64::STGPi:
2349
2350 case AArch64::LD1B_IMM:
2351 case AArch64::LD1B_H_IMM:
2352 case AArch64::LD1B_S_IMM:
2353 case AArch64::LD1B_D_IMM:
2354 case AArch64::LD1SB_H_IMM:
2355 case AArch64::LD1SB_S_IMM:
2356 case AArch64::LD1SB_D_IMM:
2357 case AArch64::LD1H_IMM:
2358 case AArch64::LD1H_S_IMM:
2359 case AArch64::LD1H_D_IMM:
2360 case AArch64::LD1SH_S_IMM:
2361 case AArch64::LD1SH_D_IMM:
2362 case AArch64::LD1W_IMM:
2363 case AArch64::LD1W_D_IMM:
2364 case AArch64::LD1SW_D_IMM:
2365 case AArch64::LD1D_IMM:
2366
2367 case AArch64::LD2B_IMM:
2368 case AArch64::LD2H_IMM:
2369 case AArch64::LD2W_IMM:
2370 case AArch64::LD2D_IMM:
2371 case AArch64::LD3B_IMM:
2372 case AArch64::LD3H_IMM:
2373 case AArch64::LD3W_IMM:
2374 case AArch64::LD3D_IMM:
2375 case AArch64::LD4B_IMM:
2376 case AArch64::LD4H_IMM:
2377 case AArch64::LD4W_IMM:
2378 case AArch64::LD4D_IMM:
2379
2380 case AArch64::ST1B_IMM:
2381 case AArch64::ST1B_H_IMM:
2382 case AArch64::ST1B_S_IMM:
2383 case AArch64::ST1B_D_IMM:
2384 case AArch64::ST1H_IMM:
2385 case AArch64::ST1H_S_IMM:
2386 case AArch64::ST1H_D_IMM:
2387 case AArch64::ST1W_IMM:
2388 case AArch64::ST1W_D_IMM:
2389 case AArch64::ST1D_IMM:
2390
2391 case AArch64::ST2B_IMM:
2392 case AArch64::ST2H_IMM:
2393 case AArch64::ST2W_IMM:
2394 case AArch64::ST2D_IMM:
2395 case AArch64::ST3B_IMM:
2396 case AArch64::ST3H_IMM:
2397 case AArch64::ST3W_IMM:
2398 case AArch64::ST3D_IMM:
2399 case AArch64::ST4B_IMM:
2400 case AArch64::ST4H_IMM:
2401 case AArch64::ST4W_IMM:
2402 case AArch64::ST4D_IMM:
2403
2404 case AArch64::LD1RB_IMM:
2405 case AArch64::LD1RB_H_IMM:
2406 case AArch64::LD1RB_S_IMM:
2407 case AArch64::LD1RB_D_IMM:
2408 case AArch64::LD1RSB_H_IMM:
2409 case AArch64::LD1RSB_S_IMM:
2410 case AArch64::LD1RSB_D_IMM:
2411 case AArch64::LD1RH_IMM:
2412 case AArch64::LD1RH_S_IMM:
2413 case AArch64::LD1RH_D_IMM:
2414 case AArch64::LD1RSH_S_IMM:
2415 case AArch64::LD1RSH_D_IMM:
2416 case AArch64::LD1RW_IMM:
2417 case AArch64::LD1RW_D_IMM:
2418 case AArch64::LD1RSW_IMM:
2419 case AArch64::LD1RD_IMM:
2420
2421 case AArch64::LDNT1B_ZRI:
2422 case AArch64::LDNT1H_ZRI:
2423 case AArch64::LDNT1W_ZRI:
2424 case AArch64::LDNT1D_ZRI:
2425 case AArch64::STNT1B_ZRI:
2426 case AArch64::STNT1H_ZRI:
2427 case AArch64::STNT1W_ZRI:
2428 case AArch64::STNT1D_ZRI:
2429
2430 case AArch64::LDNF1B_IMM:
2431 case AArch64::LDNF1B_H_IMM:
2432 case AArch64::LDNF1B_S_IMM:
2433 case AArch64::LDNF1B_D_IMM:
2434 case AArch64::LDNF1SB_H_IMM:
2435 case AArch64::LDNF1SB_S_IMM:
2436 case AArch64::LDNF1SB_D_IMM:
2437 case AArch64::LDNF1H_IMM:
2438 case AArch64::LDNF1H_S_IMM:
2439 case AArch64::LDNF1H_D_IMM:
2440 case AArch64::LDNF1SH_S_IMM:
2441 case AArch64::LDNF1SH_D_IMM:
2442 case AArch64::LDNF1W_IMM:
2443 case AArch64::LDNF1W_D_IMM:
2444 case AArch64::LDNF1SW_D_IMM:
2445 case AArch64::LDNF1D_IMM:
2446 return 3;
2447 case AArch64::ADDG:
2448 case AArch64::STGi:
2449 case AArch64::LDR_PXI:
2450 case AArch64::STR_PXI:
2451 return 2;
2452 }
2453}
2454
2456 switch (MI.getOpcode()) {
2457 default:
2458 return false;
2459 // Scaled instructions.
2460 case AArch64::STRSui:
2461 case AArch64::STRDui:
2462 case AArch64::STRQui:
2463 case AArch64::STRXui:
2464 case AArch64::STRWui:
2465 case AArch64::LDRSui:
2466 case AArch64::LDRDui:
2467 case AArch64::LDRQui:
2468 case AArch64::LDRXui:
2469 case AArch64::LDRWui:
2470 case AArch64::LDRSWui:
2471 // Unscaled instructions.
2472 case AArch64::STURSi:
2473 case AArch64::STRSpre:
2474 case AArch64::STURDi:
2475 case AArch64::STRDpre:
2476 case AArch64::STURQi:
2477 case AArch64::STRQpre:
2478 case AArch64::STURWi:
2479 case AArch64::STRWpre:
2480 case AArch64::STURXi:
2481 case AArch64::STRXpre:
2482 case AArch64::LDURSi:
2483 case AArch64::LDRSpre:
2484 case AArch64::LDURDi:
2485 case AArch64::LDRDpre:
2486 case AArch64::LDURQi:
2487 case AArch64::LDRQpre:
2488 case AArch64::LDURWi:
2489 case AArch64::LDRWpre:
2490 case AArch64::LDURXi:
2491 case AArch64::LDRXpre:
2492 case AArch64::LDURSWi:
2493 case AArch64::LDRSWpre:
2494 return true;
2495 }
2496}
2497
2499 switch (MI.getOpcode()) {
2500 default:
2501 assert((!MI.isCall() || !MI.isReturn()) &&
2502 "Unexpected instruction - was a new tail call opcode introduced?");
2503 return false;
2504 case AArch64::TCRETURNdi:
2505 case AArch64::TCRETURNri:
2506 case AArch64::TCRETURNrix16x17:
2507 case AArch64::TCRETURNrix17:
2508 case AArch64::TCRETURNrinotx16:
2509 case AArch64::TCRETURNriALL:
2510 return true;
2511 }
2512}
2513
2515 switch (Opc) {
2516 default:
2517 llvm_unreachable("Opcode has no flag setting equivalent!");
2518 // 32-bit cases:
2519 case AArch64::ADDWri:
2520 return AArch64::ADDSWri;
2521 case AArch64::ADDWrr:
2522 return AArch64::ADDSWrr;
2523 case AArch64::ADDWrs:
2524 return AArch64::ADDSWrs;
2525 case AArch64::ADDWrx:
2526 return AArch64::ADDSWrx;
2527 case AArch64::ANDWri:
2528 return AArch64::ANDSWri;
2529 case AArch64::ANDWrr:
2530 return AArch64::ANDSWrr;
2531 case AArch64::ANDWrs:
2532 return AArch64::ANDSWrs;
2533 case AArch64::BICWrr:
2534 return AArch64::BICSWrr;
2535 case AArch64::BICWrs:
2536 return AArch64::BICSWrs;
2537 case AArch64::SUBWri:
2538 return AArch64::SUBSWri;
2539 case AArch64::SUBWrr:
2540 return AArch64::SUBSWrr;
2541 case AArch64::SUBWrs:
2542 return AArch64::SUBSWrs;
2543 case AArch64::SUBWrx:
2544 return AArch64::SUBSWrx;
2545 // 64-bit cases:
2546 case AArch64::ADDXri:
2547 return AArch64::ADDSXri;
2548 case AArch64::ADDXrr:
2549 return AArch64::ADDSXrr;
2550 case AArch64::ADDXrs:
2551 return AArch64::ADDSXrs;
2552 case AArch64::ADDXrx:
2553 return AArch64::ADDSXrx;
2554 case AArch64::ANDXri:
2555 return AArch64::ANDSXri;
2556 case AArch64::ANDXrr:
2557 return AArch64::ANDSXrr;
2558 case AArch64::ANDXrs:
2559 return AArch64::ANDSXrs;
2560 case AArch64::BICXrr:
2561 return AArch64::BICSXrr;
2562 case AArch64::BICXrs:
2563 return AArch64::BICSXrs;
2564 case AArch64::SUBXri:
2565 return AArch64::SUBSXri;
2566 case AArch64::SUBXrr:
2567 return AArch64::SUBSXrr;
2568 case AArch64::SUBXrs:
2569 return AArch64::SUBSXrs;
2570 case AArch64::SUBXrx:
2571 return AArch64::SUBSXrx;
2572 // SVE instructions:
2573 case AArch64::AND_PPzPP:
2574 return AArch64::ANDS_PPzPP;
2575 case AArch64::BIC_PPzPP:
2576 return AArch64::BICS_PPzPP;
2577 case AArch64::EOR_PPzPP:
2578 return AArch64::EORS_PPzPP;
2579 case AArch64::NAND_PPzPP:
2580 return AArch64::NANDS_PPzPP;
2581 case AArch64::NOR_PPzPP:
2582 return AArch64::NORS_PPzPP;
2583 case AArch64::ORN_PPzPP:
2584 return AArch64::ORNS_PPzPP;
2585 case AArch64::ORR_PPzPP:
2586 return AArch64::ORRS_PPzPP;
2587 case AArch64::BRKA_PPzP:
2588 return AArch64::BRKAS_PPzP;
2589 case AArch64::BRKPA_PPzPP:
2590 return AArch64::BRKPAS_PPzPP;
2591 case AArch64::BRKB_PPzP:
2592 return AArch64::BRKBS_PPzP;
2593 case AArch64::BRKPB_PPzPP:
2594 return AArch64::BRKPBS_PPzPP;
2595 case AArch64::BRKN_PPzP:
2596 return AArch64::BRKNS_PPzP;
2597 case AArch64::RDFFR_PPz:
2598 return AArch64::RDFFRS_PPz;
2599 case AArch64::PTRUE_B:
2600 return AArch64::PTRUES_B;
2601 }
2602}
2603
2604// Is this a candidate for ld/st merging or pairing? For example, we don't
2605// touch volatiles or load/stores that have a hint to avoid pair formation.
2607
2608 bool IsPreLdSt = isPreLdSt(MI);
2609
2610 // If this is a volatile load/store, don't mess with it.
2611 if (MI.hasOrderedMemoryRef())
2612 return false;
2613
2614 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2615 // For Pre-inc LD/ST, the operand is shifted by one.
2616 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2617 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2618 "Expected a reg or frame index operand.");
2619
2620 // For Pre-indexed addressing quadword instructions, the third operand is the
2621 // immediate value.
2622 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2623
2624 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2625 return false;
2626
2627 // Can't merge/pair if the instruction modifies the base register.
2628 // e.g., ldr x0, [x0]
2629 // This case will never occur with an FI base.
2630 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2631 // STR<S,D,Q,W,X>pre, it can be merged.
2632 // For example:
2633 // ldr q0, [x11, #32]!
2634 // ldr q1, [x11, #16]
2635 // to
2636 // ldp q0, q1, [x11, #32]!
2637 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2638 Register BaseReg = MI.getOperand(1).getReg();
2640 if (MI.modifiesRegister(BaseReg, TRI))
2641 return false;
2642 }
2643
2644 // Check if this load/store has a hint to avoid pair formation.
2645 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2647 return false;
2648
2649 // Do not pair any callee-save store/reload instructions in the
2650 // prologue/epilogue if the CFI information encoded the operations as separate
2651 // instructions, as that will cause the size of the actual prologue to mismatch
2652 // with the prologue size recorded in the Windows CFI.
2653 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2654 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2655 MI.getMF()->getFunction().needsUnwindTableEntry();
2656 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2658 return false;
2659
2660 // On some CPUs quad load/store pairs are slower than two single load/stores.
2661 if (Subtarget.isPaired128Slow()) {
2662 switch (MI.getOpcode()) {
2663 default:
2664 break;
2665 case AArch64::LDURQi:
2666 case AArch64::STURQi:
2667 case AArch64::LDRQui:
2668 case AArch64::STRQui:
2669 return false;
2670 }
2671 }
2672
2673 return true;
2674}
2675
2678 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2679 const TargetRegisterInfo *TRI) const {
2680 if (!LdSt.mayLoadOrStore())
2681 return false;
2682
2683 const MachineOperand *BaseOp;
2684 TypeSize WidthN(0, false);
2685 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2686 WidthN, TRI))
2687 return false;
2688 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2689 // vector.
2690 Width = LocationSize::precise(WidthN);
2691 BaseOps.push_back(BaseOp);
2692 return true;
2693}
2694
2695std::optional<ExtAddrMode>
2697 const TargetRegisterInfo *TRI) const {
2698 const MachineOperand *Base; // Filled with the base operand of MI.
2699 int64_t Offset; // Filled with the offset of MI.
2700 bool OffsetIsScalable;
2701 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2702 return std::nullopt;
2703
2704 if (!Base->isReg())
2705 return std::nullopt;
2706 ExtAddrMode AM;
2707 AM.BaseReg = Base->getReg();
2708 AM.Displacement = Offset;
2709 AM.ScaledReg = 0;
2710 AM.Scale = 0;
2711 return AM;
2712}
2713
2715 Register Reg,
2716 const MachineInstr &AddrI,
2717 ExtAddrMode &AM) const {
2718 // Filter out instructions into which we cannot fold.
2719 unsigned NumBytes;
2720 int64_t OffsetScale = 1;
2721 switch (MemI.getOpcode()) {
2722 default:
2723 return false;
2724
2725 case AArch64::LDURQi:
2726 case AArch64::STURQi:
2727 NumBytes = 16;
2728 break;
2729
2730 case AArch64::LDURDi:
2731 case AArch64::STURDi:
2732 case AArch64::LDURXi:
2733 case AArch64::STURXi:
2734 NumBytes = 8;
2735 break;
2736
2737 case AArch64::LDURWi:
2738 case AArch64::LDURSWi:
2739 case AArch64::STURWi:
2740 NumBytes = 4;
2741 break;
2742
2743 case AArch64::LDURHi:
2744 case AArch64::STURHi:
2745 case AArch64::LDURHHi:
2746 case AArch64::STURHHi:
2747 case AArch64::LDURSHXi:
2748 case AArch64::LDURSHWi:
2749 NumBytes = 2;
2750 break;
2751
2752 case AArch64::LDRBroX:
2753 case AArch64::LDRBBroX:
2754 case AArch64::LDRSBXroX:
2755 case AArch64::LDRSBWroX:
2756 case AArch64::STRBroX:
2757 case AArch64::STRBBroX:
2758 case AArch64::LDURBi:
2759 case AArch64::LDURBBi:
2760 case AArch64::LDURSBXi:
2761 case AArch64::LDURSBWi:
2762 case AArch64::STURBi:
2763 case AArch64::STURBBi:
2764 case AArch64::LDRBui:
2765 case AArch64::LDRBBui:
2766 case AArch64::LDRSBXui:
2767 case AArch64::LDRSBWui:
2768 case AArch64::STRBui:
2769 case AArch64::STRBBui:
2770 NumBytes = 1;
2771 break;
2772
2773 case AArch64::LDRQroX:
2774 case AArch64::STRQroX:
2775 case AArch64::LDRQui:
2776 case AArch64::STRQui:
2777 NumBytes = 16;
2778 OffsetScale = 16;
2779 break;
2780
2781 case AArch64::LDRDroX:
2782 case AArch64::STRDroX:
2783 case AArch64::LDRXroX:
2784 case AArch64::STRXroX:
2785 case AArch64::LDRDui:
2786 case AArch64::STRDui:
2787 case AArch64::LDRXui:
2788 case AArch64::STRXui:
2789 NumBytes = 8;
2790 OffsetScale = 8;
2791 break;
2792
2793 case AArch64::LDRWroX:
2794 case AArch64::LDRSWroX:
2795 case AArch64::STRWroX:
2796 case AArch64::LDRWui:
2797 case AArch64::LDRSWui:
2798 case AArch64::STRWui:
2799 NumBytes = 4;
2800 OffsetScale = 4;
2801 break;
2802
2803 case AArch64::LDRHroX:
2804 case AArch64::STRHroX:
2805 case AArch64::LDRHHroX:
2806 case AArch64::STRHHroX:
2807 case AArch64::LDRSHXroX:
2808 case AArch64::LDRSHWroX:
2809 case AArch64::LDRHui:
2810 case AArch64::STRHui:
2811 case AArch64::LDRHHui:
2812 case AArch64::STRHHui:
2813 case AArch64::LDRSHXui:
2814 case AArch64::LDRSHWui:
2815 NumBytes = 2;
2816 OffsetScale = 2;
2817 break;
2818 }
2819
2820 // Check the fold operand is not the loaded/stored value.
2821 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2822 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2823 return false;
2824
2825 // Handle memory instructions with a [Reg, Reg] addressing mode.
2826 if (MemI.getOperand(2).isReg()) {
2827 // Bail if the addressing mode already includes extension of the offset
2828 // register.
2829 if (MemI.getOperand(3).getImm())
2830 return false;
2831
2832 // Check if we actually have a scaled offset.
2833 if (MemI.getOperand(4).getImm() == 0)
2834 OffsetScale = 1;
2835
2836 // If the address instructions is folded into the base register, then the
2837 // addressing mode must not have a scale. Then we can swap the base and the
2838 // scaled registers.
2839 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2840 return false;
2841
2842 switch (AddrI.getOpcode()) {
2843 default:
2844 return false;
2845
2846 case AArch64::SBFMXri:
2847 // sxtw Xa, Wm
2848 // ldr Xd, [Xn, Xa, lsl #N]
2849 // ->
2850 // ldr Xd, [Xn, Wm, sxtw #N]
2851 if (AddrI.getOperand(2).getImm() != 0 ||
2852 AddrI.getOperand(3).getImm() != 31)
2853 return false;
2854
2855 AM.BaseReg = MemI.getOperand(1).getReg();
2856 if (AM.BaseReg == Reg)
2857 AM.BaseReg = MemI.getOperand(2).getReg();
2858 AM.ScaledReg = AddrI.getOperand(1).getReg();
2859 AM.Scale = OffsetScale;
2860 AM.Displacement = 0;
2862 return true;
2863
2864 case TargetOpcode::SUBREG_TO_REG: {
2865 // mov Wa, Wm
2866 // ldr Xd, [Xn, Xa, lsl #N]
2867 // ->
2868 // ldr Xd, [Xn, Wm, uxtw #N]
2869
2870 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
2871 if (AddrI.getOperand(1).getImm() != 0 ||
2872 AddrI.getOperand(3).getImm() != AArch64::sub_32)
2873 return false;
2874
2875 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
2876 Register OffsetReg = AddrI.getOperand(2).getReg();
2877 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
2878 return false;
2879
2880 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
2881 if (DefMI.getOpcode() != AArch64::ORRWrs ||
2882 DefMI.getOperand(1).getReg() != AArch64::WZR ||
2883 DefMI.getOperand(3).getImm() != 0)
2884 return false;
2885
2886 AM.BaseReg = MemI.getOperand(1).getReg();
2887 if (AM.BaseReg == Reg)
2888 AM.BaseReg = MemI.getOperand(2).getReg();
2889 AM.ScaledReg = DefMI.getOperand(2).getReg();
2890 AM.Scale = OffsetScale;
2891 AM.Displacement = 0;
2893 return true;
2894 }
2895 }
2896 }
2897
2898 // Handle memory instructions with a [Reg, #Imm] addressing mode.
2899
2900 // Check we are not breaking a potential conversion to an LDP.
2901 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
2902 int64_t NewOffset) -> bool {
2903 int64_t MinOffset, MaxOffset;
2904 switch (NumBytes) {
2905 default:
2906 return true;
2907 case 4:
2908 MinOffset = -256;
2909 MaxOffset = 252;
2910 break;
2911 case 8:
2912 MinOffset = -512;
2913 MaxOffset = 504;
2914 break;
2915 case 16:
2916 MinOffset = -1024;
2917 MaxOffset = 1008;
2918 break;
2919 }
2920 return OldOffset < MinOffset || OldOffset > MaxOffset ||
2921 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
2922 };
2923 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
2924 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
2925 int64_t NewOffset = OldOffset + Disp;
2926 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
2927 return false;
2928 // If the old offset would fit into an LDP, but the new offset wouldn't,
2929 // bail out.
2930 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
2931 return false;
2932 AM.BaseReg = AddrI.getOperand(1).getReg();
2933 AM.ScaledReg = 0;
2934 AM.Scale = 0;
2935 AM.Displacement = NewOffset;
2937 return true;
2938 };
2939
2940 auto canFoldAddRegIntoAddrMode =
2941 [&](int64_t Scale,
2943 if (MemI.getOperand(2).getImm() != 0)
2944 return false;
2945 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
2946 return false;
2947 AM.BaseReg = AddrI.getOperand(1).getReg();
2948 AM.ScaledReg = AddrI.getOperand(2).getReg();
2949 AM.Scale = Scale;
2950 AM.Displacement = 0;
2951 AM.Form = Form;
2952 return true;
2953 };
2954
2955 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
2956 unsigned Opcode = MemI.getOpcode();
2957 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
2958 Subtarget.isSTRQroSlow();
2959 };
2960
2961 int64_t Disp = 0;
2962 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
2963 switch (AddrI.getOpcode()) {
2964 default:
2965 return false;
2966
2967 case AArch64::ADDXri:
2968 // add Xa, Xn, #N
2969 // ldr Xd, [Xa, #M]
2970 // ->
2971 // ldr Xd, [Xn, #N'+M]
2972 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2973 return canFoldAddSubImmIntoAddrMode(Disp);
2974
2975 case AArch64::SUBXri:
2976 // sub Xa, Xn, #N
2977 // ldr Xd, [Xa, #M]
2978 // ->
2979 // ldr Xd, [Xn, #N'+M]
2980 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2981 return canFoldAddSubImmIntoAddrMode(-Disp);
2982
2983 case AArch64::ADDXrs: {
2984 // add Xa, Xn, Xm, lsl #N
2985 // ldr Xd, [Xa]
2986 // ->
2987 // ldr Xd, [Xn, Xm, lsl #N]
2988
2989 // Don't fold the add if the result would be slower, unless optimising for
2990 // size.
2991 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
2993 return false;
2994 Shift = AArch64_AM::getShiftValue(Shift);
2995 if (!OptSize) {
2996 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
2997 return false;
2998 if (avoidSlowSTRQ(MemI))
2999 return false;
3000 }
3001 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3002 }
3003
3004 case AArch64::ADDXrr:
3005 // add Xa, Xn, Xm
3006 // ldr Xd, [Xa]
3007 // ->
3008 // ldr Xd, [Xn, Xm, lsl #0]
3009
3010 // Don't fold the add if the result would be slower, unless optimising for
3011 // size.
3012 if (!OptSize && avoidSlowSTRQ(MemI))
3013 return false;
3014 return canFoldAddRegIntoAddrMode(1);
3015
3016 case AArch64::ADDXrx:
3017 // add Xa, Xn, Wm, {s,u}xtw #N
3018 // ldr Xd, [Xa]
3019 // ->
3020 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3021
3022 // Don't fold the add if the result would be slower, unless optimising for
3023 // size.
3024 if (!OptSize && avoidSlowSTRQ(MemI))
3025 return false;
3026
3027 // Can fold only sign-/zero-extend of a word.
3028 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3030 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3031 return false;
3032
3033 return canFoldAddRegIntoAddrMode(
3034 1ULL << AArch64_AM::getArithShiftValue(Imm),
3037 }
3038}
3039
3040// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3041// return the opcode of an instruction performing the same operation, but using
3042// the [Reg, Reg] addressing mode.
3043static unsigned regOffsetOpcode(unsigned Opcode) {
3044 switch (Opcode) {
3045 default:
3046 llvm_unreachable("Address folding not implemented for instruction");
3047
3048 case AArch64::LDURQi:
3049 case AArch64::LDRQui:
3050 return AArch64::LDRQroX;
3051 case AArch64::STURQi:
3052 case AArch64::STRQui:
3053 return AArch64::STRQroX;
3054 case AArch64::LDURDi:
3055 case AArch64::LDRDui:
3056 return AArch64::LDRDroX;
3057 case AArch64::STURDi:
3058 case AArch64::STRDui:
3059 return AArch64::STRDroX;
3060 case AArch64::LDURXi:
3061 case AArch64::LDRXui:
3062 return AArch64::LDRXroX;
3063 case AArch64::STURXi:
3064 case AArch64::STRXui:
3065 return AArch64::STRXroX;
3066 case AArch64::LDURWi:
3067 case AArch64::LDRWui:
3068 return AArch64::LDRWroX;
3069 case AArch64::LDURSWi:
3070 case AArch64::LDRSWui:
3071 return AArch64::LDRSWroX;
3072 case AArch64::STURWi:
3073 case AArch64::STRWui:
3074 return AArch64::STRWroX;
3075 case AArch64::LDURHi:
3076 case AArch64::LDRHui:
3077 return AArch64::LDRHroX;
3078 case AArch64::STURHi:
3079 case AArch64::STRHui:
3080 return AArch64::STRHroX;
3081 case AArch64::LDURHHi:
3082 case AArch64::LDRHHui:
3083 return AArch64::LDRHHroX;
3084 case AArch64::STURHHi:
3085 case AArch64::STRHHui:
3086 return AArch64::STRHHroX;
3087 case AArch64::LDURSHXi:
3088 case AArch64::LDRSHXui:
3089 return AArch64::LDRSHXroX;
3090 case AArch64::LDURSHWi:
3091 case AArch64::LDRSHWui:
3092 return AArch64::LDRSHWroX;
3093 case AArch64::LDURBi:
3094 case AArch64::LDRBui:
3095 return AArch64::LDRBroX;
3096 case AArch64::LDURBBi:
3097 case AArch64::LDRBBui:
3098 return AArch64::LDRBBroX;
3099 case AArch64::LDURSBXi:
3100 case AArch64::LDRSBXui:
3101 return AArch64::LDRSBXroX;
3102 case AArch64::LDURSBWi:
3103 case AArch64::LDRSBWui:
3104 return AArch64::LDRSBWroX;
3105 case AArch64::STURBi:
3106 case AArch64::STRBui:
3107 return AArch64::STRBroX;
3108 case AArch64::STURBBi:
3109 case AArch64::STRBBui:
3110 return AArch64::STRBBroX;
3111 }
3112}
3113
3114// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3115// the opcode of an instruction performing the same operation, but using the
3116// [Reg, #Imm] addressing mode with scaled offset.
3117unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3118 switch (Opcode) {
3119 default:
3120 llvm_unreachable("Address folding not implemented for instruction");
3121
3122 case AArch64::LDURQi:
3123 Scale = 16;
3124 return AArch64::LDRQui;
3125 case AArch64::STURQi:
3126 Scale = 16;
3127 return AArch64::STRQui;
3128 case AArch64::LDURDi:
3129 Scale = 8;
3130 return AArch64::LDRDui;
3131 case AArch64::STURDi:
3132 Scale = 8;
3133 return AArch64::STRDui;
3134 case AArch64::LDURXi:
3135 Scale = 8;
3136 return AArch64::LDRXui;
3137 case AArch64::STURXi:
3138 Scale = 8;
3139 return AArch64::STRXui;
3140 case AArch64::LDURWi:
3141 Scale = 4;
3142 return AArch64::LDRWui;
3143 case AArch64::LDURSWi:
3144 Scale = 4;
3145 return AArch64::LDRSWui;
3146 case AArch64::STURWi:
3147 Scale = 4;
3148 return AArch64::STRWui;
3149 case AArch64::LDURHi:
3150 Scale = 2;
3151 return AArch64::LDRHui;
3152 case AArch64::STURHi:
3153 Scale = 2;
3154 return AArch64::STRHui;
3155 case AArch64::LDURHHi:
3156 Scale = 2;
3157 return AArch64::LDRHHui;
3158 case AArch64::STURHHi:
3159 Scale = 2;
3160 return AArch64::STRHHui;
3161 case AArch64::LDURSHXi:
3162 Scale = 2;
3163 return AArch64::LDRSHXui;
3164 case AArch64::LDURSHWi:
3165 Scale = 2;
3166 return AArch64::LDRSHWui;
3167 case AArch64::LDURBi:
3168 Scale = 1;
3169 return AArch64::LDRBui;
3170 case AArch64::LDURBBi:
3171 Scale = 1;
3172 return AArch64::LDRBBui;
3173 case AArch64::LDURSBXi:
3174 Scale = 1;
3175 return AArch64::LDRSBXui;
3176 case AArch64::LDURSBWi:
3177 Scale = 1;
3178 return AArch64::LDRSBWui;
3179 case AArch64::STURBi:
3180 Scale = 1;
3181 return AArch64::STRBui;
3182 case AArch64::STURBBi:
3183 Scale = 1;
3184 return AArch64::STRBBui;
3185 case AArch64::LDRQui:
3186 case AArch64::STRQui:
3187 Scale = 16;
3188 return Opcode;
3189 case AArch64::LDRDui:
3190 case AArch64::STRDui:
3191 case AArch64::LDRXui:
3192 case AArch64::STRXui:
3193 Scale = 8;
3194 return Opcode;
3195 case AArch64::LDRWui:
3196 case AArch64::LDRSWui:
3197 case AArch64::STRWui:
3198 Scale = 4;
3199 return Opcode;
3200 case AArch64::LDRHui:
3201 case AArch64::STRHui:
3202 case AArch64::LDRHHui:
3203 case AArch64::STRHHui:
3204 case AArch64::LDRSHXui:
3205 case AArch64::LDRSHWui:
3206 Scale = 2;
3207 return Opcode;
3208 case AArch64::LDRBui:
3209 case AArch64::LDRBBui:
3210 case AArch64::LDRSBXui:
3211 case AArch64::LDRSBWui:
3212 case AArch64::STRBui:
3213 case AArch64::STRBBui:
3214 Scale = 1;
3215 return Opcode;
3216 }
3217}
3218
3219// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3220// the opcode of an instruction performing the same operation, but using the
3221// [Reg, #Imm] addressing mode with unscaled offset.
3222unsigned unscaledOffsetOpcode(unsigned Opcode) {
3223 switch (Opcode) {
3224 default:
3225 llvm_unreachable("Address folding not implemented for instruction");
3226
3227 case AArch64::LDURQi:
3228 case AArch64::STURQi:
3229 case AArch64::LDURDi:
3230 case AArch64::STURDi:
3231 case AArch64::LDURXi:
3232 case AArch64::STURXi:
3233 case AArch64::LDURWi:
3234 case AArch64::LDURSWi:
3235 case AArch64::STURWi:
3236 case AArch64::LDURHi:
3237 case AArch64::STURHi:
3238 case AArch64::LDURHHi:
3239 case AArch64::STURHHi:
3240 case AArch64::LDURSHXi:
3241 case AArch64::LDURSHWi:
3242 case AArch64::LDURBi:
3243 case AArch64::STURBi:
3244 case AArch64::LDURBBi:
3245 case AArch64::STURBBi:
3246 case AArch64::LDURSBWi:
3247 case AArch64::LDURSBXi:
3248 return Opcode;
3249 case AArch64::LDRQui:
3250 return AArch64::LDURQi;
3251 case AArch64::STRQui:
3252 return AArch64::STURQi;
3253 case AArch64::LDRDui:
3254 return AArch64::LDURDi;
3255 case AArch64::STRDui:
3256 return AArch64::STURDi;
3257 case AArch64::LDRXui:
3258 return AArch64::LDURXi;
3259 case AArch64::STRXui:
3260 return AArch64::STURXi;
3261 case AArch64::LDRWui:
3262 return AArch64::LDURWi;
3263 case AArch64::LDRSWui:
3264 return AArch64::LDURSWi;
3265 case AArch64::STRWui:
3266 return AArch64::STURWi;
3267 case AArch64::LDRHui:
3268 return AArch64::LDURHi;
3269 case AArch64::STRHui:
3270 return AArch64::STURHi;
3271 case AArch64::LDRHHui:
3272 return AArch64::LDURHHi;
3273 case AArch64::STRHHui:
3274 return AArch64::STURHHi;
3275 case AArch64::LDRSHXui:
3276 return AArch64::LDURSHXi;
3277 case AArch64::LDRSHWui:
3278 return AArch64::LDURSHWi;
3279 case AArch64::LDRBBui:
3280 return AArch64::LDURBBi;
3281 case AArch64::LDRBui:
3282 return AArch64::LDURBi;
3283 case AArch64::STRBBui:
3284 return AArch64::STURBBi;
3285 case AArch64::STRBui:
3286 return AArch64::STURBi;
3287 case AArch64::LDRSBWui:
3288 return AArch64::LDURSBWi;
3289 case AArch64::LDRSBXui:
3290 return AArch64::LDURSBXi;
3291 }
3292}
3293
3294// Given the opcode of a memory load/store instruction, return the opcode of an
3295// instruction performing the same operation, but using
3296// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3297// offset register.
3298static unsigned offsetExtendOpcode(unsigned Opcode) {
3299 switch (Opcode) {
3300 default:
3301 llvm_unreachable("Address folding not implemented for instruction");
3302
3303 case AArch64::LDRQroX:
3304 case AArch64::LDURQi:
3305 case AArch64::LDRQui:
3306 return AArch64::LDRQroW;
3307 case AArch64::STRQroX:
3308 case AArch64::STURQi:
3309 case AArch64::STRQui:
3310 return AArch64::STRQroW;
3311 case AArch64::LDRDroX:
3312 case AArch64::LDURDi:
3313 case AArch64::LDRDui:
3314 return AArch64::LDRDroW;
3315 case AArch64::STRDroX:
3316 case AArch64::STURDi:
3317 case AArch64::STRDui:
3318 return AArch64::STRDroW;
3319 case AArch64::LDRXroX:
3320 case AArch64::LDURXi:
3321 case AArch64::LDRXui:
3322 return AArch64::LDRXroW;
3323 case AArch64::STRXroX:
3324 case AArch64::STURXi:
3325 case AArch64::STRXui:
3326 return AArch64::STRXroW;
3327 case AArch64::LDRWroX:
3328 case AArch64::LDURWi:
3329 case AArch64::LDRWui:
3330 return AArch64::LDRWroW;
3331 case AArch64::LDRSWroX:
3332 case AArch64::LDURSWi:
3333 case AArch64::LDRSWui:
3334 return AArch64::LDRSWroW;
3335 case AArch64::STRWroX:
3336 case AArch64::STURWi:
3337 case AArch64::STRWui:
3338 return AArch64::STRWroW;
3339 case AArch64::LDRHroX:
3340 case AArch64::LDURHi:
3341 case AArch64::LDRHui:
3342 return AArch64::LDRHroW;
3343 case AArch64::STRHroX:
3344 case AArch64::STURHi:
3345 case AArch64::STRHui:
3346 return AArch64::STRHroW;
3347 case AArch64::LDRHHroX:
3348 case AArch64::LDURHHi:
3349 case AArch64::LDRHHui:
3350 return AArch64::LDRHHroW;
3351 case AArch64::STRHHroX:
3352 case AArch64::STURHHi:
3353 case AArch64::STRHHui:
3354 return AArch64::STRHHroW;
3355 case AArch64::LDRSHXroX:
3356 case AArch64::LDURSHXi:
3357 case AArch64::LDRSHXui:
3358 return AArch64::LDRSHXroW;
3359 case AArch64::LDRSHWroX:
3360 case AArch64::LDURSHWi:
3361 case AArch64::LDRSHWui:
3362 return AArch64::LDRSHWroW;
3363 case AArch64::LDRBroX:
3364 case AArch64::LDURBi:
3365 case AArch64::LDRBui:
3366 return AArch64::LDRBroW;
3367 case AArch64::LDRBBroX:
3368 case AArch64::LDURBBi:
3369 case AArch64::LDRBBui:
3370 return AArch64::LDRBBroW;
3371 case AArch64::LDRSBXroX:
3372 case AArch64::LDURSBXi:
3373 case AArch64::LDRSBXui:
3374 return AArch64::LDRSBXroW;
3375 case AArch64::LDRSBWroX:
3376 case AArch64::LDURSBWi:
3377 case AArch64::LDRSBWui:
3378 return AArch64::LDRSBWroW;
3379 case AArch64::STRBroX:
3380 case AArch64::STURBi:
3381 case AArch64::STRBui:
3382 return AArch64::STRBroW;
3383 case AArch64::STRBBroX:
3384 case AArch64::STURBBi:
3385 case AArch64::STRBBui:
3386 return AArch64::STRBBroW;
3387 }
3388}
3389
3391 const ExtAddrMode &AM) const {
3392
3393 const DebugLoc &DL = MemI.getDebugLoc();
3394 MachineBasicBlock &MBB = *MemI.getParent();
3396
3398 if (AM.ScaledReg) {
3399 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3400 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3401 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3402 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3403 .addReg(MemI.getOperand(0).getReg(),
3404 MemI.mayLoad() ? RegState::Define : 0)
3405 .addReg(AM.BaseReg)
3406 .addReg(AM.ScaledReg)
3407 .addImm(0)
3408 .addImm(AM.Scale > 1)
3409 .setMemRefs(MemI.memoperands())
3410 .setMIFlags(MemI.getFlags());
3411 return B.getInstr();
3412 }
3413
3414 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3415 "Addressing mode not supported for folding");
3416
3417 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3418 unsigned Scale = 1;
3419 unsigned Opcode = MemI.getOpcode();
3420 if (isInt<9>(AM.Displacement))
3421 Opcode = unscaledOffsetOpcode(Opcode);
3422 else
3423 Opcode = scaledOffsetOpcode(Opcode, Scale);
3424
3425 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3426 .addReg(MemI.getOperand(0).getReg(),
3427 MemI.mayLoad() ? RegState::Define : 0)
3428 .addReg(AM.BaseReg)
3429 .addImm(AM.Displacement / Scale)
3430 .setMemRefs(MemI.memoperands())
3431 .setMIFlags(MemI.getFlags());
3432 return B.getInstr();
3433 }
3434
3437 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3438 assert(AM.ScaledReg && !AM.Displacement &&
3439 "Address offset can be a register or an immediate, but not both");
3440 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3441 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3442 // Make sure the offset register is in the correct register class.
3443 Register OffsetReg = AM.ScaledReg;
3444 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3445 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3446 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3447 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3448 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3449 }
3450 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3451 .addReg(MemI.getOperand(0).getReg(),
3452 MemI.mayLoad() ? RegState::Define : 0)
3453 .addReg(AM.BaseReg)
3454 .addReg(OffsetReg)
3456 .addImm(AM.Scale != 1)
3457 .setMemRefs(MemI.memoperands())
3458 .setMIFlags(MemI.getFlags());
3459
3460 return B.getInstr();
3461 }
3462
3464 "Function must not be called with an addressing mode it can't handle");
3465}
3466
3468 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3469 bool &OffsetIsScalable, TypeSize &Width,
3470 const TargetRegisterInfo *TRI) const {
3471 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3472 // Handle only loads/stores with base register followed by immediate offset.
3473 if (LdSt.getNumExplicitOperands() == 3) {
3474 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3475 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3476 !LdSt.getOperand(2).isImm())
3477 return false;
3478 } else if (LdSt.getNumExplicitOperands() == 4) {
3479 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3480 if (!LdSt.getOperand(1).isReg() ||
3481 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3482 !LdSt.getOperand(3).isImm())
3483 return false;
3484 } else
3485 return false;
3486
3487 // Get the scaling factor for the instruction and set the width for the
3488 // instruction.
3489 TypeSize Scale(0U, false);
3490 int64_t Dummy1, Dummy2;
3491
3492 // If this returns false, then it's an instruction we don't want to handle.
3493 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3494 return false;
3495
3496 // Compute the offset. Offset is calculated as the immediate operand
3497 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3498 // set to 1.
3499 if (LdSt.getNumExplicitOperands() == 3) {
3500 BaseOp = &LdSt.getOperand(1);
3501 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3502 } else {
3503 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3504 BaseOp = &LdSt.getOperand(2);
3505 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3506 }
3507 OffsetIsScalable = Scale.isScalable();
3508
3509 if (!BaseOp->isReg() && !BaseOp->isFI())
3510 return false;
3511
3512 return true;
3513}
3514
3517 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3518 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3519 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3520 return OfsOp;
3521}
3522
3523bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3524 TypeSize &Width, int64_t &MinOffset,
3525 int64_t &MaxOffset) {
3526 switch (Opcode) {
3527 // Not a memory operation or something we want to handle.
3528 default:
3529 Scale = TypeSize::getFixed(0);
3530 Width = TypeSize::getFixed(0);
3531 MinOffset = MaxOffset = 0;
3532 return false;
3533 case AArch64::STRWpost:
3534 case AArch64::LDRWpost:
3535 Width = TypeSize::getFixed(32);
3536 Scale = TypeSize::getFixed(4);
3537 MinOffset = -256;
3538 MaxOffset = 255;
3539 break;
3540 case AArch64::LDURQi:
3541 case AArch64::STURQi:
3542 Width = TypeSize::getFixed(16);
3543 Scale = TypeSize::getFixed(1);
3544 MinOffset = -256;
3545 MaxOffset = 255;
3546 break;
3547 case AArch64::PRFUMi:
3548 case AArch64::LDURXi:
3549 case AArch64::LDURDi:
3550 case AArch64::LDAPURXi:
3551 case AArch64::STURXi:
3552 case AArch64::STURDi:
3553 case AArch64::STLURXi:
3554 Width = TypeSize::getFixed(8);
3555 Scale = TypeSize::getFixed(1);
3556 MinOffset = -256;
3557 MaxOffset = 255;
3558 break;
3559 case AArch64::LDURWi:
3560 case AArch64::LDURSi:
3561 case AArch64::LDURSWi:
3562 case AArch64::LDAPURi:
3563 case AArch64::LDAPURSWi:
3564 case AArch64::STURWi:
3565 case AArch64::STURSi:
3566 case AArch64::STLURWi:
3567 Width = TypeSize::getFixed(4);
3568 Scale = TypeSize::getFixed(1);
3569 MinOffset = -256;
3570 MaxOffset = 255;
3571 break;
3572 case AArch64::LDURHi:
3573 case AArch64::LDURHHi:
3574 case AArch64::LDURSHXi:
3575 case AArch64::LDURSHWi:
3576 case AArch64::LDAPURHi:
3577 case AArch64::LDAPURSHWi:
3578 case AArch64::LDAPURSHXi:
3579 case AArch64::STURHi:
3580 case AArch64::STURHHi:
3581 case AArch64::STLURHi:
3582 Width = TypeSize::getFixed(2);
3583 Scale = TypeSize::getFixed(1);
3584 MinOffset = -256;
3585 MaxOffset = 255;
3586 break;
3587 case AArch64::LDURBi:
3588 case AArch64::LDURBBi:
3589 case AArch64::LDURSBXi:
3590 case AArch64::LDURSBWi:
3591 case AArch64::LDAPURBi:
3592 case AArch64::LDAPURSBWi:
3593 case AArch64::LDAPURSBXi:
3594 case AArch64::STURBi:
3595 case AArch64::STURBBi:
3596 case AArch64::STLURBi:
3597 Width = TypeSize::getFixed(1);
3598 Scale = TypeSize::getFixed(1);
3599 MinOffset = -256;
3600 MaxOffset = 255;
3601 break;
3602 case AArch64::LDPQi:
3603 case AArch64::LDNPQi:
3604 case AArch64::STPQi:
3605 case AArch64::STNPQi:
3606 Scale = TypeSize::getFixed(16);
3607 Width = TypeSize::getFixed(32);
3608 MinOffset = -64;
3609 MaxOffset = 63;
3610 break;
3611 case AArch64::LDRQui:
3612 case AArch64::STRQui:
3613 Scale = TypeSize::getFixed(16);
3614 Width = TypeSize::getFixed(16);
3615 MinOffset = 0;
3616 MaxOffset = 4095;
3617 break;
3618 case AArch64::LDPXi:
3619 case AArch64::LDPDi:
3620 case AArch64::LDNPXi:
3621 case AArch64::LDNPDi:
3622 case AArch64::STPXi:
3623 case AArch64::STPDi:
3624 case AArch64::STNPXi:
3625 case AArch64::STNPDi:
3626 Scale = TypeSize::getFixed(8);
3627 Width = TypeSize::getFixed(16);
3628 MinOffset = -64;
3629 MaxOffset = 63;
3630 break;
3631 case AArch64::PRFMui:
3632 case AArch64::LDRXui:
3633 case AArch64::LDRDui:
3634 case AArch64::STRXui:
3635 case AArch64::STRDui:
3636 Scale = TypeSize::getFixed(8);
3637 Width = TypeSize::getFixed(8);
3638 MinOffset = 0;
3639 MaxOffset = 4095;
3640 break;
3641 case AArch64::StoreSwiftAsyncContext:
3642 // Store is an STRXui, but there might be an ADDXri in the expansion too.
3643 Scale = TypeSize::getFixed(1);
3644 Width = TypeSize::getFixed(8);
3645 MinOffset = 0;
3646 MaxOffset = 4095;
3647 break;
3648 case AArch64::LDPWi:
3649 case AArch64::LDPSi:
3650 case AArch64::LDNPWi:
3651 case AArch64::LDNPSi:
3652 case AArch64::STPWi:
3653 case AArch64::STPSi:
3654 case AArch64::STNPWi:
3655 case AArch64::STNPSi:
3656 Scale = TypeSize::getFixed(4);
3657 Width = TypeSize::getFixed(8);
3658 MinOffset = -64;
3659 MaxOffset = 63;
3660 break;
3661 case AArch64::LDRWui:
3662 case AArch64::LDRSui:
3663 case AArch64::LDRSWui:
3664 case AArch64::STRWui:
3665 case AArch64::STRSui:
3666 Scale = TypeSize::getFixed(4);
3667 Width = TypeSize::getFixed(4);
3668 MinOffset = 0;
3669 MaxOffset = 4095;
3670 break;
3671 case AArch64::LDRHui:
3672 case AArch64::LDRHHui:
3673 case AArch64::LDRSHWui:
3674 case AArch64::LDRSHXui:
3675 case AArch64::STRHui:
3676 case AArch64::STRHHui:
3677 Scale = TypeSize::getFixed(2);
3678 Width = TypeSize::getFixed(2);
3679 MinOffset = 0;
3680 MaxOffset = 4095;
3681 break;
3682 case AArch64::LDRBui:
3683 case AArch64::LDRBBui:
3684 case AArch64::LDRSBWui:
3685 case AArch64::LDRSBXui:
3686 case AArch64::STRBui:
3687 case AArch64::STRBBui:
3688 Scale = TypeSize::getFixed(1);
3689 Width = TypeSize::getFixed(1);
3690 MinOffset = 0;
3691 MaxOffset = 4095;
3692 break;
3693 case AArch64::STPXpre:
3694 case AArch64::LDPXpost:
3695 case AArch64::STPDpre:
3696 case AArch64::LDPDpost:
3697 Scale = TypeSize::getFixed(8);
3698 Width = TypeSize::getFixed(8);
3699 MinOffset = -512;
3700 MaxOffset = 504;
3701 break;
3702 case AArch64::STPQpre:
3703 case AArch64::LDPQpost:
3704 Scale = TypeSize::getFixed(16);
3705 Width = TypeSize::getFixed(16);
3706 MinOffset = -1024;
3707 MaxOffset = 1008;
3708 break;
3709 case AArch64::STRXpre:
3710 case AArch64::STRDpre:
3711 case AArch64::LDRXpost:
3712 case AArch64::LDRDpost:
3713 Scale = TypeSize::getFixed(1);
3714 Width = TypeSize::getFixed(8);
3715 MinOffset = -256;
3716 MaxOffset = 255;
3717 break;
3718 case AArch64::STRQpre:
3719 case AArch64::LDRQpost:
3720 Scale = TypeSize::getFixed(1);
3721 Width = TypeSize::getFixed(16);
3722 MinOffset = -256;
3723 MaxOffset = 255;
3724 break;
3725 case AArch64::ADDG:
3726 Scale = TypeSize::getFixed(16);
3727 Width = TypeSize::getFixed(0);
3728 MinOffset = 0;
3729 MaxOffset = 63;
3730 break;
3731 case AArch64::TAGPstack:
3732 Scale = TypeSize::getFixed(16);
3733 Width = TypeSize::getFixed(0);
3734 // TAGP with a negative offset turns into SUBP, which has a maximum offset
3735 // of 63 (not 64!).
3736 MinOffset = -63;
3737 MaxOffset = 63;
3738 break;
3739 case AArch64::LDG:
3740 case AArch64::STGi:
3741 case AArch64::STZGi:
3742 Scale = TypeSize::getFixed(16);
3743 Width = TypeSize::getFixed(16);
3744 MinOffset = -256;
3745 MaxOffset = 255;
3746 break;
3747 case AArch64::STR_ZZZZXI:
3748 case AArch64::LDR_ZZZZXI:
3749 Scale = TypeSize::getScalable(16);
3750 Width = TypeSize::getScalable(16 * 4);
3751 MinOffset = -256;
3752 MaxOffset = 252;
3753 break;
3754 case AArch64::STR_ZZZXI:
3755 case AArch64::LDR_ZZZXI:
3756 Scale = TypeSize::getScalable(16);
3757 Width = TypeSize::getScalable(16 * 3);
3758 MinOffset = -256;
3759 MaxOffset = 253;
3760 break;
3761 case AArch64::STR_ZZXI:
3762 case AArch64::LDR_ZZXI:
3763 Scale = TypeSize::getScalable(16);
3764 Width = TypeSize::getScalable(16 * 2);
3765 MinOffset = -256;
3766 MaxOffset = 254;
3767 break;
3768 case AArch64::LDR_PXI:
3769 case AArch64::STR_PXI:
3770 Scale = TypeSize::getScalable(2);
3771 Width = TypeSize::getScalable(2);
3772 MinOffset = -256;
3773 MaxOffset = 255;
3774 break;
3775 case AArch64::LDR_PPXI:
3776 case AArch64::STR_PPXI:
3777 Scale = TypeSize::getScalable(2);
3778 Width = TypeSize::getScalable(2 * 2);
3779 MinOffset = -256;
3780 MaxOffset = 254;
3781 break;
3782 case AArch64::LDR_ZXI:
3783 case AArch64::STR_ZXI:
3784 Scale = TypeSize::getScalable(16);
3785 Width = TypeSize::getScalable(16);
3786 MinOffset = -256;
3787 MaxOffset = 255;
3788 break;
3789 case AArch64::LD1B_IMM:
3790 case AArch64::LD1H_IMM:
3791 case AArch64::LD1W_IMM:
3792 case AArch64::LD1D_IMM:
3793 case AArch64::LDNT1B_ZRI:
3794 case AArch64::LDNT1H_ZRI:
3795 case AArch64::LDNT1W_ZRI:
3796 case AArch64::LDNT1D_ZRI:
3797 case AArch64::ST1B_IMM:
3798 case AArch64::ST1H_IMM:
3799 case AArch64::ST1W_IMM:
3800 case AArch64::ST1D_IMM:
3801 case AArch64::STNT1B_ZRI:
3802 case AArch64::STNT1H_ZRI:
3803 case AArch64::STNT1W_ZRI:
3804 case AArch64::STNT1D_ZRI:
3805 case AArch64::LDNF1B_IMM:
3806 case AArch64::LDNF1H_IMM:
3807 case AArch64::LDNF1W_IMM:
3808 case AArch64::LDNF1D_IMM:
3809 // A full vectors worth of data
3810 // Width = mbytes * elements
3811 Scale = TypeSize::getScalable(16);
3812 Width = TypeSize::getScalable(16);
3813 MinOffset = -8;
3814 MaxOffset = 7;
3815 break;
3816 case AArch64::LD2B_IMM:
3817 case AArch64::LD2H_IMM:
3818 case AArch64::LD2W_IMM:
3819 case AArch64::LD2D_IMM:
3820 case AArch64::ST2B_IMM:
3821 case AArch64::ST2H_IMM:
3822 case AArch64::ST2W_IMM:
3823 case AArch64::ST2D_IMM:
3824 Scale = TypeSize::getScalable(32);
3825 Width = TypeSize::getScalable(16 * 2);
3826 MinOffset = -8;
3827 MaxOffset = 7;
3828 break;
3829 case AArch64::LD3B_IMM:
3830 case AArch64::LD3H_IMM:
3831 case AArch64::LD3W_IMM:
3832 case AArch64::LD3D_IMM:
3833 case AArch64::ST3B_IMM:
3834 case AArch64::ST3H_IMM:
3835 case AArch64::ST3W_IMM:
3836 case AArch64::ST3D_IMM:
3837 Scale = TypeSize::getScalable(48);
3838 Width = TypeSize::getScalable(16 * 3);
3839 MinOffset = -8;
3840 MaxOffset = 7;
3841 break;
3842 case AArch64::LD4B_IMM:
3843 case AArch64::LD4H_IMM:
3844 case AArch64::LD4W_IMM:
3845 case AArch64::LD4D_IMM:
3846 case AArch64::ST4B_IMM:
3847 case AArch64::ST4H_IMM:
3848 case AArch64::ST4W_IMM:
3849 case AArch64::ST4D_IMM:
3850 Scale = TypeSize::getScalable(64);
3851 Width = TypeSize::getScalable(16 * 4);
3852 MinOffset = -8;
3853 MaxOffset = 7;
3854 break;
3855 case AArch64::LD1B_H_IMM:
3856 case AArch64::LD1SB_H_IMM:
3857 case AArch64::LD1H_S_IMM:
3858 case AArch64::LD1SH_S_IMM:
3859 case AArch64::LD1W_D_IMM:
3860 case AArch64::LD1SW_D_IMM:
3861 case AArch64::ST1B_H_IMM:
3862 case AArch64::ST1H_S_IMM:
3863 case AArch64::ST1W_D_IMM:
3864 case AArch64::LDNF1B_H_IMM:
3865 case AArch64::LDNF1SB_H_IMM:
3866 case AArch64::LDNF1H_S_IMM:
3867 case AArch64::LDNF1SH_S_IMM:
3868 case AArch64::LDNF1W_D_IMM:
3869 case AArch64::LDNF1SW_D_IMM:
3870 // A half vector worth of data
3871 // Width = mbytes * elements
3872 Scale = TypeSize::getScalable(8);
3873 Width = TypeSize::getScalable(8);
3874 MinOffset = -8;
3875 MaxOffset = 7;
3876 break;
3877 case AArch64::LD1B_S_IMM:
3878 case AArch64::LD1SB_S_IMM:
3879 case AArch64::LD1H_D_IMM:
3880 case AArch64::LD1SH_D_IMM:
3881 case AArch64::ST1B_S_IMM:
3882 case AArch64::ST1H_D_IMM:
3883 case AArch64::LDNF1B_S_IMM:
3884 case AArch64::LDNF1SB_S_IMM:
3885 case AArch64::LDNF1H_D_IMM:
3886 case AArch64::LDNF1SH_D_IMM:
3887 // A quarter vector worth of data
3888 // Width = mbytes * elements
3889 Scale = TypeSize::getScalable(4);
3890 Width = TypeSize::getScalable(4);
3891 MinOffset = -8;
3892 MaxOffset = 7;
3893 break;
3894 case AArch64::LD1B_D_IMM:
3895 case AArch64::LD1SB_D_IMM:
3896 case AArch64::ST1B_D_IMM:
3897 case AArch64::LDNF1B_D_IMM:
3898 case AArch64::LDNF1SB_D_IMM:
3899 // A eighth vector worth of data
3900 // Width = mbytes * elements
3901 Scale = TypeSize::getScalable(2);
3902 Width = TypeSize::getScalable(2);
3903 MinOffset = -8;
3904 MaxOffset = 7;
3905 break;
3906 case AArch64::ST2Gi:
3907 case AArch64::STZ2Gi:
3908 Scale = TypeSize::getFixed(16);
3909 Width = TypeSize::getFixed(32);
3910 MinOffset = -256;
3911 MaxOffset = 255;
3912 break;
3913 case AArch64::STGPi:
3914 Scale = TypeSize::getFixed(16);
3915 Width = TypeSize::getFixed(16);
3916 MinOffset = -64;
3917 MaxOffset = 63;
3918 break;
3919 case AArch64::LD1RB_IMM:
3920 case AArch64::LD1RB_H_IMM:
3921 case AArch64::LD1RB_S_IMM:
3922 case AArch64::LD1RB_D_IMM:
3923 case AArch64::LD1RSB_H_IMM:
3924 case AArch64::LD1RSB_S_IMM:
3925 case AArch64::LD1RSB_D_IMM:
3926 Scale = TypeSize::getFixed(1);
3927 Width = TypeSize::getFixed(1);
3928 MinOffset = 0;
3929 MaxOffset = 63;
3930 break;
3931 case AArch64::LD1RH_IMM:
3932 case AArch64::LD1RH_S_IMM:
3933 case AArch64::LD1RH_D_IMM:
3934 case AArch64::LD1RSH_S_IMM:
3935 case AArch64::LD1RSH_D_IMM:
3936 Scale = TypeSize::getFixed(2);
3937 Width = TypeSize::getFixed(2);
3938 MinOffset = 0;
3939 MaxOffset = 63;
3940 break;
3941 case AArch64::LD1RW_IMM:
3942 case AArch64::LD1RW_D_IMM:
3943 case AArch64::LD1RSW_IMM:
3944 Scale = TypeSize::getFixed(4);
3945 Width = TypeSize::getFixed(4);
3946 MinOffset = 0;
3947 MaxOffset = 63;
3948 break;
3949 case AArch64::LD1RD_IMM:
3950 Scale = TypeSize::getFixed(8);
3951 Width = TypeSize::getFixed(8);
3952 MinOffset = 0;
3953 MaxOffset = 63;
3954 break;
3955 }
3956
3957 return true;
3958}
3959
3960// Scaling factor for unscaled load or store.
3962 switch (Opc) {
3963 default:
3964 llvm_unreachable("Opcode has unknown scale!");
3965 case AArch64::LDRBBui:
3966 case AArch64::LDURBBi:
3967 case AArch64::LDRSBWui:
3968 case AArch64::LDURSBWi:
3969 case AArch64::STRBBui:
3970 case AArch64::STURBBi:
3971 return 1;
3972 case AArch64::LDRHHui:
3973 case AArch64::LDURHHi:
3974 case AArch64::LDRSHWui:
3975 case AArch64::LDURSHWi:
3976 case AArch64::STRHHui:
3977 case AArch64::STURHHi:
3978 return 2;
3979 case AArch64::LDRSui:
3980 case AArch64::LDURSi:
3981 case AArch64::LDRSpre:
3982 case AArch64::LDRSWui:
3983 case AArch64::LDURSWi:
3984 case AArch64::LDRSWpre:
3985 case AArch64::LDRWpre:
3986 case AArch64::LDRWui:
3987 case AArch64::LDURWi:
3988 case AArch64::STRSui:
3989 case AArch64::STURSi:
3990 case AArch64::STRSpre:
3991 case AArch64::STRWui:
3992 case AArch64::STURWi:
3993 case AArch64::STRWpre:
3994 case AArch64::LDPSi:
3995 case AArch64::LDPSWi:
3996 case AArch64::LDPWi:
3997 case AArch64::STPSi:
3998 case AArch64::STPWi:
3999 return 4;
4000 case AArch64::LDRDui:
4001 case AArch64::LDURDi:
4002 case AArch64::LDRDpre:
4003 case AArch64::LDRXui:
4004 case AArch64::LDURXi:
4005 case AArch64::LDRXpre:
4006 case AArch64::STRDui:
4007 case AArch64::STURDi:
4008 case AArch64::STRDpre:
4009 case AArch64::STRXui:
4010 case AArch64::STURXi:
4011 case AArch64::STRXpre:
4012 case AArch64::LDPDi:
4013 case AArch64::LDPXi:
4014 case AArch64::STPDi:
4015 case AArch64::STPXi:
4016 return 8;
4017 case AArch64::LDRQui:
4018 case AArch64::LDURQi:
4019 case AArch64::STRQui:
4020 case AArch64::STURQi:
4021 case AArch64::STRQpre:
4022 case AArch64::LDPQi:
4023 case AArch64::LDRQpre:
4024 case AArch64::STPQi:
4025 case AArch64::STGi:
4026 case AArch64::STZGi:
4027 case AArch64::ST2Gi:
4028 case AArch64::STZ2Gi:
4029 case AArch64::STGPi:
4030 return 16;
4031 }
4032}
4033
4035 switch (MI.getOpcode()) {
4036 default:
4037 return false;
4038 case AArch64::LDRWpre:
4039 case AArch64::LDRXpre:
4040 case AArch64::LDRSWpre:
4041 case AArch64::LDRSpre:
4042 case AArch64::LDRDpre:
4043 case AArch64::LDRQpre:
4044 return true;
4045 }
4046}
4047
4049 switch (MI.getOpcode()) {
4050 default:
4051 return false;
4052 case AArch64::STRWpre:
4053 case AArch64::STRXpre:
4054 case AArch64::STRSpre:
4055 case AArch64::STRDpre:
4056 case AArch64::STRQpre:
4057 return true;
4058 }
4059}
4060
4062 return isPreLd(MI) || isPreSt(MI);
4063}
4064
4066 switch (MI.getOpcode()) {
4067 default:
4068 return false;
4069 case AArch64::LDPSi:
4070 case AArch64::LDPSWi:
4071 case AArch64::LDPDi:
4072 case AArch64::LDPQi:
4073 case AArch64::LDPWi:
4074 case AArch64::LDPXi:
4075 case AArch64::STPSi:
4076 case AArch64::STPDi:
4077 case AArch64::STPQi:
4078 case AArch64::STPWi:
4079 case AArch64::STPXi:
4080 case AArch64::STGPi:
4081 return true;
4082 }
4083}
4084
4086 unsigned Idx =
4088 : 1;
4089 return MI.getOperand(Idx);
4090}
4091
4092const MachineOperand &
4094 unsigned Idx =
4096 : 2;
4097 return MI.getOperand(Idx);
4098}
4099
4101 Register Reg) {
4102 if (MI.getParent() == nullptr)
4103 return nullptr;
4104 const MachineFunction *MF = MI.getParent()->getParent();
4105 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4106}
4107
4109 auto IsHFPR = [&](const MachineOperand &Op) {
4110 if (!Op.isReg())
4111 return false;
4112 auto Reg = Op.getReg();
4113 if (Reg.isPhysical())
4114 return AArch64::FPR16RegClass.contains(Reg);
4115 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4116 return TRC == &AArch64::FPR16RegClass ||
4117 TRC == &AArch64::FPR16_loRegClass;
4118 };
4119 return llvm::any_of(MI.operands(), IsHFPR);
4120}
4121
4123 auto IsQFPR = [&](const MachineOperand &Op) {
4124 if (!Op.isReg())
4125 return false;
4126 auto Reg = Op.getReg();
4127 if (Reg.isPhysical())
4128 return AArch64::FPR128RegClass.contains(Reg);
4129 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4130 return TRC == &AArch64::FPR128RegClass ||
4131 TRC == &AArch64::FPR128_loRegClass;
4132 };
4133 return llvm::any_of(MI.operands(), IsQFPR);
4134}
4135
4137 switch (MI.getOpcode()) {
4138 case AArch64::BRK:
4139 case AArch64::HLT:
4140 case AArch64::PACIASP:
4141 case AArch64::PACIBSP:
4142 // Implicit BTI behavior.
4143 return true;
4144 case AArch64::PAUTH_PROLOGUE:
4145 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4146 return true;
4147 case AArch64::HINT: {
4148 unsigned Imm = MI.getOperand(0).getImm();
4149 // Explicit BTI instruction.
4150 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4151 return true;
4152 // PACI(A|B)SP instructions.
4153 if (Imm == 25 || Imm == 27)
4154 return true;
4155 return false;
4156 }
4157 default:
4158 return false;
4159 }
4160}
4161
4163 auto IsFPR = [&](const MachineOperand &Op) {
4164 if (!Op.isReg())
4165 return false;
4166 auto Reg = Op.getReg();
4167 if (Reg.isPhysical())
4168 return AArch64::FPR128RegClass.contains(Reg) ||
4169 AArch64::FPR64RegClass.contains(Reg) ||
4170 AArch64::FPR32RegClass.contains(Reg) ||
4171 AArch64::FPR16RegClass.contains(Reg) ||
4172 AArch64::FPR8RegClass.contains(Reg);
4173
4174 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4175 return TRC == &AArch64::FPR128RegClass ||
4176 TRC == &AArch64::FPR128_loRegClass ||
4177 TRC == &AArch64::FPR64RegClass ||
4178 TRC == &AArch64::FPR64_loRegClass ||
4179 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4180 TRC == &AArch64::FPR8RegClass;
4181 };
4182 return llvm::any_of(MI.operands(), IsFPR);
4183}
4184
4185// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4186// scaled.
4187static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4188 int Scale = AArch64InstrInfo::getMemScale(Opc);
4189
4190 // If the byte-offset isn't a multiple of the stride, we can't scale this
4191 // offset.
4192 if (Offset % Scale != 0)
4193 return false;
4194
4195 // Convert the byte-offset used by unscaled into an "element" offset used
4196 // by the scaled pair load/store instructions.
4197 Offset /= Scale;
4198 return true;
4199}
4200
4201static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4202 if (FirstOpc == SecondOpc)
4203 return true;
4204 // We can also pair sign-ext and zero-ext instructions.
4205 switch (FirstOpc) {
4206 default:
4207 return false;
4208 case AArch64::STRSui:
4209 case AArch64::STURSi:
4210 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4211 case AArch64::STRDui:
4212 case AArch64::STURDi:
4213 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4214 case AArch64::STRQui:
4215 case AArch64::STURQi:
4216 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4217 case AArch64::STRWui:
4218 case AArch64::STURWi:
4219 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4220 case AArch64::STRXui:
4221 case AArch64::STURXi:
4222 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4223 case AArch64::LDRSui:
4224 case AArch64::LDURSi:
4225 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4226 case AArch64::LDRDui:
4227 case AArch64::LDURDi:
4228 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4229 case AArch64::LDRQui:
4230 case AArch64::LDURQi:
4231 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4232 case AArch64::LDRWui:
4233 case AArch64::LDURWi:
4234 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4235 case AArch64::LDRSWui:
4236 case AArch64::LDURSWi:
4237 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4238 case AArch64::LDRXui:
4239 case AArch64::LDURXi:
4240 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4241 }
4242 // These instructions can't be paired based on their opcodes.
4243 return false;
4244}
4245
4246static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4247 int64_t Offset1, unsigned Opcode1, int FI2,
4248 int64_t Offset2, unsigned Opcode2) {
4249 // Accesses through fixed stack object frame indices may access a different
4250 // fixed stack slot. Check that the object offsets + offsets match.
4251 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4252 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4253 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4254 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4255 // Convert to scaled object offsets.
4256 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4257 if (ObjectOffset1 % Scale1 != 0)
4258 return false;
4259 ObjectOffset1 /= Scale1;
4260 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4261 if (ObjectOffset2 % Scale2 != 0)
4262 return false;
4263 ObjectOffset2 /= Scale2;
4264 ObjectOffset1 += Offset1;
4265 ObjectOffset2 += Offset2;
4266 return ObjectOffset1 + 1 == ObjectOffset2;
4267 }
4268
4269 return FI1 == FI2;
4270}
4271
4272/// Detect opportunities for ldp/stp formation.
4273///
4274/// Only called for LdSt for which getMemOperandWithOffset returns true.
4276 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4277 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4278 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4279 unsigned NumBytes) const {
4280 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4281 const MachineOperand &BaseOp1 = *BaseOps1.front();
4282 const MachineOperand &BaseOp2 = *BaseOps2.front();
4283 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4284 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4285 if (BaseOp1.getType() != BaseOp2.getType())
4286 return false;
4287
4288 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4289 "Only base registers and frame indices are supported.");
4290
4291 // Check for both base regs and base FI.
4292 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4293 return false;
4294
4295 // Only cluster up to a single pair.
4296 if (ClusterSize > 2)
4297 return false;
4298
4299 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4300 return false;
4301
4302 // Can we pair these instructions based on their opcodes?
4303 unsigned FirstOpc = FirstLdSt.getOpcode();
4304 unsigned SecondOpc = SecondLdSt.getOpcode();
4305 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4306 return false;
4307
4308 // Can't merge volatiles or load/stores that have a hint to avoid pair
4309 // formation, for example.
4310 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4311 !isCandidateToMergeOrPair(SecondLdSt))
4312 return false;
4313
4314 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4315 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4316 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4317 return false;
4318
4319 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4320 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4321 return false;
4322
4323 // Pairwise instructions have a 7-bit signed offset field.
4324 if (Offset1 > 63 || Offset1 < -64)
4325 return false;
4326
4327 // The caller should already have ordered First/SecondLdSt by offset.
4328 // Note: except for non-equal frame index bases
4329 if (BaseOp1.isFI()) {
4330 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4331 "Caller should have ordered offsets.");
4332
4333 const MachineFrameInfo &MFI =
4334 FirstLdSt.getParent()->getParent()->getFrameInfo();
4335 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4336 BaseOp2.getIndex(), Offset2, SecondOpc);
4337 }
4338
4339 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4340
4341 return Offset1 + 1 == Offset2;
4342}
4343
4345 unsigned Reg, unsigned SubIdx,
4346 unsigned State,
4347 const TargetRegisterInfo *TRI) {
4348 if (!SubIdx)
4349 return MIB.addReg(Reg, State);
4350
4352 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4353 return MIB.addReg(Reg, State, SubIdx);
4354}
4355
4356static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4357 unsigned NumRegs) {
4358 // We really want the positive remainder mod 32 here, that happens to be
4359 // easily obtainable with a mask.
4360 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4361}
4362
4365 const DebugLoc &DL, MCRegister DestReg,
4366 MCRegister SrcReg, bool KillSrc,
4367 unsigned Opcode,
4368 ArrayRef<unsigned> Indices) const {
4369 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4371 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4372 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4373 unsigned NumRegs = Indices.size();
4374
4375 int SubReg = 0, End = NumRegs, Incr = 1;
4376 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4377 SubReg = NumRegs - 1;
4378 End = -1;
4379 Incr = -1;
4380 }
4381
4382 for (; SubReg != End; SubReg += Incr) {
4383 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4384 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4385 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4386 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4387 }
4388}
4389
4392 DebugLoc DL, unsigned DestReg,
4393 unsigned SrcReg, bool KillSrc,
4394 unsigned Opcode, unsigned ZeroReg,
4395 llvm::ArrayRef<unsigned> Indices) const {
4397 unsigned NumRegs = Indices.size();
4398
4399#ifndef NDEBUG
4400 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4401 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4402 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4403 "GPR reg sequences should not be able to overlap");
4404#endif
4405
4406 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4407 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4408 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4409 MIB.addReg(ZeroReg);
4410 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4411 MIB.addImm(0);
4412 }
4413}
4414
4417 const DebugLoc &DL, MCRegister DestReg,
4418 MCRegister SrcReg, bool KillSrc) const {
4419 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4420 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4422
4423 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4424 // If either operand is WSP, expand to ADD #0.
4425 if (Subtarget.hasZeroCycleRegMove()) {
4426 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4427 MCRegister DestRegX = TRI->getMatchingSuperReg(
4428 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4429 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4430 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4431 // This instruction is reading and writing X registers. This may upset
4432 // the register scavenger and machine verifier, so we need to indicate
4433 // that we are reading an undefined value from SrcRegX, but a proper
4434 // value from SrcReg.
4435 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4436 .addReg(SrcRegX, RegState::Undef)
4437 .addImm(0)
4439 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4440 } else {
4441 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4442 .addReg(SrcReg, getKillRegState(KillSrc))
4443 .addImm(0)
4445 }
4446 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4447 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4448 .addImm(0)
4450 } else {
4451 if (Subtarget.hasZeroCycleRegMove()) {
4452 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4453 MCRegister DestRegX = TRI->getMatchingSuperReg(
4454 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4455 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4456 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4457 // This instruction is reading and writing X registers. This may upset
4458 // the register scavenger and machine verifier, so we need to indicate
4459 // that we are reading an undefined value from SrcRegX, but a proper
4460 // value from SrcReg.
4461 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4462 .addReg(AArch64::XZR)
4463 .addReg(SrcRegX, RegState::Undef)
4464 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4465 } else {
4466 // Otherwise, expand to ORR WZR.
4467 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4468 .addReg(AArch64::WZR)
4469 .addReg(SrcReg, getKillRegState(KillSrc));
4470 }
4471 }
4472 return;
4473 }
4474
4475 // Copy a Predicate register by ORRing with itself.
4476 if (AArch64::PPRRegClass.contains(DestReg) &&
4477 AArch64::PPRRegClass.contains(SrcReg)) {
4478 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4479 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4480 .addReg(SrcReg) // Pg
4481 .addReg(SrcReg)
4482 .addReg(SrcReg, getKillRegState(KillSrc));
4483 return;
4484 }
4485
4486 // Copy a predicate-as-counter register by ORRing with itself as if it
4487 // were a regular predicate (mask) register.
4488 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4489 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4490 if (DestIsPNR || SrcIsPNR) {
4491 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4492 "Unexpected predicate-as-counter register.");
4493 auto ToPPR = [](MCRegister R) -> MCRegister {
4494 return (R - AArch64::PN0) + AArch64::P0;
4495 };
4496 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4497 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4498
4499 if (PPRSrcReg != PPRDestReg) {
4500 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4501 .addReg(PPRSrcReg) // Pg
4502 .addReg(PPRSrcReg)
4503 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4504 if (DestIsPNR)
4505 NewMI.addDef(DestReg, RegState::Implicit);
4506 }
4507 return;
4508 }
4509
4510 // Copy a Z register by ORRing with itself.
4511 if (AArch64::ZPRRegClass.contains(DestReg) &&
4512 AArch64::ZPRRegClass.contains(SrcReg)) {
4513 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4514 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4515 .addReg(SrcReg)
4516 .addReg(SrcReg, getKillRegState(KillSrc));
4517 return;
4518 }
4519
4520 // Copy a Z register pair by copying the individual sub-registers.
4521 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4522 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4523 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4524 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4525 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4526 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4527 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4528 Indices);
4529 return;
4530 }
4531
4532 // Copy a Z register triple by copying the individual sub-registers.
4533 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4534 AArch64::ZPR3RegClass.contains(SrcReg)) {
4535 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4536 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4537 AArch64::zsub2};
4538 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4539 Indices);
4540 return;
4541 }
4542
4543 // Copy a Z register quad by copying the individual sub-registers.
4544 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4545 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4546 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4547 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4548 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4549 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4550 AArch64::zsub2, AArch64::zsub3};
4551 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4552 Indices);
4553 return;
4554 }
4555
4556 if (AArch64::GPR64spRegClass.contains(DestReg) &&
4557 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
4558 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
4559 // If either operand is SP, expand to ADD #0.
4560 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
4561 .addReg(SrcReg, getKillRegState(KillSrc))
4562 .addImm(0)
4564 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
4565 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
4566 .addImm(0)
4568 } else {
4569 // Otherwise, expand to ORR XZR.
4570 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
4571 .addReg(AArch64::XZR)
4572 .addReg(SrcReg, getKillRegState(KillSrc));
4573 }
4574 return;
4575 }
4576
4577 // Copy a DDDD register quad by copying the individual sub-registers.
4578 if (AArch64::DDDDRegClass.contains(DestReg) &&
4579 AArch64::DDDDRegClass.contains(SrcReg)) {
4580 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4581 AArch64::dsub2, AArch64::dsub3};
4582 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4583 Indices);
4584 return;
4585 }
4586
4587 // Copy a DDD register triple by copying the individual sub-registers.
4588 if (AArch64::DDDRegClass.contains(DestReg) &&
4589 AArch64::DDDRegClass.contains(SrcReg)) {
4590 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4591 AArch64::dsub2};
4592 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4593 Indices);
4594 return;
4595 }
4596
4597 // Copy a DD register pair by copying the individual sub-registers.
4598 if (AArch64::DDRegClass.contains(DestReg) &&
4599 AArch64::DDRegClass.contains(SrcReg)) {
4600 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
4601 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4602 Indices);
4603 return;
4604 }
4605
4606 // Copy a QQQQ register quad by copying the individual sub-registers.
4607 if (AArch64::QQQQRegClass.contains(DestReg) &&
4608 AArch64::QQQQRegClass.contains(SrcReg)) {
4609 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4610 AArch64::qsub2, AArch64::qsub3};
4611 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4612 Indices);
4613 return;
4614 }
4615
4616 // Copy a QQQ register triple by copying the individual sub-registers.
4617 if (AArch64::QQQRegClass.contains(DestReg) &&
4618 AArch64::QQQRegClass.contains(SrcReg)) {
4619 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4620 AArch64::qsub2};
4621 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4622 Indices);
4623 return;
4624 }
4625
4626 // Copy a QQ register pair by copying the individual sub-registers.
4627 if (AArch64::QQRegClass.contains(DestReg) &&
4628 AArch64::QQRegClass.contains(SrcReg)) {
4629 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
4630 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4631 Indices);
4632 return;
4633 }
4634
4635 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
4636 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
4637 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
4638 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
4639 AArch64::XZR, Indices);
4640 return;
4641 }
4642
4643 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
4644 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
4645 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
4646 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
4647 AArch64::WZR, Indices);
4648 return;
4649 }
4650
4651 if (AArch64::FPR128RegClass.contains(DestReg) &&
4652 AArch64::FPR128RegClass.contains(SrcReg)) {
4653 if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
4654 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
4655 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
4656 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
4657 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4658 else if (Subtarget.hasNEON())
4659 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
4660 .addReg(SrcReg)
4661 .addReg(SrcReg, getKillRegState(KillSrc));
4662 else {
4663 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
4664 .addReg(AArch64::SP, RegState::Define)
4665 .addReg(SrcReg, getKillRegState(KillSrc))
4666 .addReg(AArch64::SP)
4667 .addImm(-16);
4668 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
4669 .addReg(AArch64::SP, RegState::Define)
4670 .addReg(DestReg, RegState::Define)
4671 .addReg(AArch64::SP)
4672 .addImm(16);
4673 }
4674 return;
4675 }
4676
4677 if (AArch64::FPR64RegClass.contains(DestReg) &&
4678 AArch64::FPR64RegClass.contains(SrcReg)) {
4679 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
4680 .addReg(SrcReg, getKillRegState(KillSrc));
4681 return;
4682 }
4683
4684 if (AArch64::FPR32RegClass.contains(DestReg) &&
4685 AArch64::FPR32RegClass.contains(SrcReg)) {
4686 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4687 .addReg(SrcReg, getKillRegState(KillSrc));
4688 return;
4689 }
4690
4691 if (AArch64::FPR16RegClass.contains(DestReg) &&
4692 AArch64::FPR16RegClass.contains(SrcReg)) {
4693 DestReg =
4694 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
4695 SrcReg =
4696 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
4697 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4698 .addReg(SrcReg, getKillRegState(KillSrc));
4699 return;
4700 }
4701
4702 if (AArch64::FPR8RegClass.contains(DestReg) &&
4703 AArch64::FPR8RegClass.contains(SrcReg)) {
4704 DestReg =
4705 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
4706 SrcReg =
4707 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
4708 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4709 .addReg(SrcReg, getKillRegState(KillSrc));
4710 return;
4711 }
4712
4713 // Copies between GPR64 and FPR64.
4714 if (AArch64::FPR64RegClass.contains(DestReg) &&
4715 AArch64::GPR64RegClass.contains(SrcReg)) {
4716 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
4717 .addReg(SrcReg, getKillRegState(KillSrc));
4718 return;
4719 }
4720 if (AArch64::GPR64RegClass.contains(DestReg) &&
4721 AArch64::FPR64RegClass.contains(SrcReg)) {
4722 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
4723 .addReg(SrcReg, getKillRegState(KillSrc));
4724 return;
4725 }
4726 // Copies between GPR32 and FPR32.
4727 if (AArch64::FPR32RegClass.contains(DestReg) &&
4728 AArch64::GPR32RegClass.contains(SrcReg)) {
4729 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
4730 .addReg(SrcReg, getKillRegState(KillSrc));
4731 return;
4732 }
4733 if (AArch64::GPR32RegClass.contains(DestReg) &&
4734 AArch64::FPR32RegClass.contains(SrcReg)) {
4735 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
4736 .addReg(SrcReg, getKillRegState(KillSrc));
4737 return;
4738 }
4739
4740 if (DestReg == AArch64::NZCV) {
4741 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
4742 BuildMI(MBB, I, DL, get(AArch64::MSR))
4743 .addImm(AArch64SysReg::NZCV)
4744 .addReg(SrcReg, getKillRegState(KillSrc))
4745 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
4746 return;
4747 }
4748
4749 if (SrcReg == AArch64::NZCV) {
4750 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
4751 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
4752 .addImm(AArch64SysReg::NZCV)
4753 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
4754 return;
4755 }
4756
4757#ifndef NDEBUG
4759 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
4760 << TRI.getRegAsmName(SrcReg) << "\n";
4761#endif
4762 llvm_unreachable("unimplemented reg-to-reg copy");
4763}
4764
4767 MachineBasicBlock::iterator InsertBefore,
4768 const MCInstrDesc &MCID,
4769 Register SrcReg, bool IsKill,
4770 unsigned SubIdx0, unsigned SubIdx1, int FI,
4771 MachineMemOperand *MMO) {
4772 Register SrcReg0 = SrcReg;
4773 Register SrcReg1 = SrcReg;
4774 if (SrcReg.isPhysical()) {
4775 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
4776 SubIdx0 = 0;
4777 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
4778 SubIdx1 = 0;
4779 }
4780 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4781 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
4782 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
4783 .addFrameIndex(FI)
4784 .addImm(0)
4785 .addMemOperand(MMO);
4786}
4787
4790 Register SrcReg, bool isKill, int FI,
4791 const TargetRegisterClass *RC,
4792 const TargetRegisterInfo *TRI,
4793 Register VReg) const {
4794 MachineFunction &MF = *MBB.getParent();
4795 MachineFrameInfo &MFI = MF.getFrameInfo();
4796
4798 MachineMemOperand *MMO =
4800 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4801 unsigned Opc = 0;
4802 bool Offset = true;
4804 unsigned StackID = TargetStackID::Default;
4805 switch (TRI->getSpillSize(*RC)) {
4806 case 1:
4807 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4808 Opc = AArch64::STRBui;
4809 break;
4810 case 2: {
4811 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
4812 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4813 Opc = AArch64::STRHui;
4814 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
4815 assert(Subtarget.hasSVEorSME() &&
4816 "Unexpected register store without SVE store instructions");
4817 assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4818 "Unexpected register store without SVE2p1 or SME2");
4819 Opc = AArch64::STR_PXI;
4821 }
4822 break;
4823 }
4824 case 4:
4825 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4826 Opc = AArch64::STRWui;
4827 if (SrcReg.isVirtual())
4828 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
4829 else
4830 assert(SrcReg != AArch64::WSP);
4831 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4832 Opc = AArch64::STRSui;
4833 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
4834 Opc = AArch64::STR_PPXI;
4836 }
4837 break;
4838 case 8:
4839 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4840 Opc = AArch64::STRXui;
4841 if (SrcReg.isVirtual())
4842 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4843 else
4844 assert(SrcReg != AArch64::SP);
4845 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4846 Opc = AArch64::STRDui;
4847 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4849 get(AArch64::STPWi), SrcReg, isKill,
4850 AArch64::sube32, AArch64::subo32, FI, MMO);
4851 return;
4852 }
4853 break;
4854 case 16:
4855 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4856 Opc = AArch64::STRQui;
4857 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4858 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4859 Opc = AArch64::ST1Twov1d;
4860 Offset = false;
4861 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4863 get(AArch64::STPXi), SrcReg, isKill,
4864 AArch64::sube64, AArch64::subo64, FI, MMO);
4865 return;
4866 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4867 assert(Subtarget.hasSVEorSME() &&
4868 "Unexpected register store without SVE store instructions");
4869 Opc = AArch64::STR_ZXI;
4871 }
4872 break;
4873 case 24:
4874 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4875 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4876 Opc = AArch64::ST1Threev1d;
4877 Offset = false;
4878 }
4879 break;
4880 case 32:
4881 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4882 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4883 Opc = AArch64::ST1Fourv1d;
4884 Offset = false;
4885 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4886 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4887 Opc = AArch64::ST1Twov2d;
4888 Offset = false;
4889 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4890 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4891 assert(Subtarget.hasSVEorSME() &&
4892 "Unexpected register store without SVE store instructions");
4893 Opc = AArch64::STR_ZZXI;
4895 }
4896 break;
4897 case 48:
4898 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4899 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4900 Opc = AArch64::ST1Threev2d;
4901 Offset = false;
4902 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4903 assert(Subtarget.hasSVEorSME() &&
4904 "Unexpected register store without SVE store instructions");
4905 Opc = AArch64::STR_ZZZXI;
4907 }
4908 break;
4909 case 64:
4910 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4911 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4912 Opc = AArch64::ST1Fourv2d;
4913 Offset = false;
4914 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4915 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4916 assert(Subtarget.hasSVEorSME() &&
4917 "Unexpected register store without SVE store instructions");
4918 Opc = AArch64::STR_ZZZZXI;
4920 }
4921 break;
4922 }
4923 assert(Opc && "Unknown register class");
4924 MFI.setStackID(FI, StackID);
4925
4926 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
4927 .addReg(SrcReg, getKillRegState(isKill))
4928 .addFrameIndex(FI);
4929
4930 if (Offset)
4931 MI.addImm(0);
4932 if (PNRReg.isValid())
4933 MI.addDef(PNRReg, RegState::Implicit);
4934 MI.addMemOperand(MMO);
4935}
4936
4939 MachineBasicBlock::iterator InsertBefore,
4940 const MCInstrDesc &MCID,
4941 Register DestReg, unsigned SubIdx0,
4942 unsigned SubIdx1, int FI,
4943 MachineMemOperand *MMO) {
4944 Register DestReg0 = DestReg;
4945 Register DestReg1 = DestReg;
4946 bool IsUndef = true;
4947 if (DestReg.isPhysical()) {
4948 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
4949 SubIdx0 = 0;
4950 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
4951 SubIdx1 = 0;
4952 IsUndef = false;
4953 }
4954 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4955 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
4956 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
4957 .addFrameIndex(FI)
4958 .addImm(0)
4959 .addMemOperand(MMO);
4960}
4961
4964 Register DestReg, int FI,
4965 const TargetRegisterClass *RC,
4966 const TargetRegisterInfo *TRI,
4967 Register VReg) const {
4968 MachineFunction &MF = *MBB.getParent();
4969 MachineFrameInfo &MFI = MF.getFrameInfo();
4971 MachineMemOperand *MMO =
4973 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4974
4975 unsigned Opc = 0;
4976 bool Offset = true;
4977 unsigned StackID = TargetStackID::Default;
4979 switch (TRI->getSpillSize(*RC)) {
4980 case 1:
4981 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4982 Opc = AArch64::LDRBui;
4983 break;
4984 case 2: {
4985 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
4986 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4987 Opc = AArch64::LDRHui;
4988 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
4989 assert(Subtarget.hasSVEorSME() &&
4990 "Unexpected register load without SVE load instructions");
4991 assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4992 "Unexpected register load without SVE2p1 or SME2");
4993 if (IsPNR)
4994 PNRReg = DestReg;
4995 Opc = AArch64::LDR_PXI;
4997 }
4998 break;
4999 }
5000 case 4:
5001 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5002 Opc = AArch64::LDRWui;
5003 if (DestReg.isVirtual())
5004 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5005 else
5006 assert(DestReg != AArch64::WSP);
5007 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5008 Opc = AArch64::LDRSui;
5009 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5010 Opc = AArch64::LDR_PPXI;
5012 }
5013 break;
5014 case 8:
5015 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5016 Opc = AArch64::LDRXui;
5017 if (DestReg.isVirtual())
5018 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5019 else
5020 assert(DestReg != AArch64::SP);
5021 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5022 Opc = AArch64::LDRDui;
5023 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5025 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5026 AArch64::subo32, FI, MMO);
5027 return;
5028 }
5029 break;
5030 case 16:
5031 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5032 Opc = AArch64::LDRQui;
5033 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5034 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5035 Opc = AArch64::LD1Twov1d;
5036 Offset = false;
5037 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5039 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5040 AArch64::subo64, FI, MMO);
5041 return;
5042 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5043 assert(Subtarget.hasSVEorSME() &&
5044 "Unexpected register load without SVE load instructions");
5045 Opc = AArch64::LDR_ZXI;
5047 }
5048 break;
5049 case 24:
5050 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5051 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5052 Opc = AArch64::LD1Threev1d;
5053 Offset = false;
5054 }
5055 break;
5056 case 32:
5057 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5058 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5059 Opc = AArch64::LD1Fourv1d;
5060 Offset = false;
5061 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5062 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5063 Opc = AArch64::LD1Twov2d;
5064 Offset = false;
5065 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5066 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5067 assert(Subtarget.hasSVEorSME() &&
5068 "Unexpected register load without SVE load instructions");
5069 Opc = AArch64::LDR_ZZXI;
5071 }
5072 break;
5073 case 48:
5074 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5075 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5076 Opc = AArch64::LD1Threev2d;
5077 Offset = false;
5078 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5079 assert(Subtarget.hasSVEorSME() &&
5080 "Unexpected register load without SVE load instructions");
5081 Opc = AArch64::LDR_ZZZXI;
5083 }
5084 break;
5085 case 64:
5086 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5087 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5088 Opc = AArch64::LD1Fourv2d;
5089 Offset = false;
5090 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5091 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5092 assert(Subtarget.hasSVEorSME() &&
5093 "Unexpected register load without SVE load instructions");
5094 Opc = AArch64::LDR_ZZZZXI;
5096 }
5097 break;
5098 }
5099
5100 assert(Opc && "Unknown register class");
5101 MFI.setStackID(FI, StackID);
5102
5103 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5104 .addReg(DestReg, getDefRegState(true))
5105 .addFrameIndex(FI);
5106 if (Offset)
5107 MI.addImm(0);
5108 if (PNRReg.isValid() && !PNRReg.isVirtual())
5109 MI.addDef(PNRReg, RegState::Implicit);
5110 MI.addMemOperand(MMO);
5111
5112 if (PNRReg.isValid() && PNRReg.isVirtual())
5113 BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
5114 .addReg(DestReg);
5115}
5116
5118 const MachineInstr &UseMI,
5119 const TargetRegisterInfo *TRI) {
5120 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5121 UseMI.getIterator()),
5122 [TRI](const MachineInstr &I) {
5123 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5124 I.readsRegister(AArch64::NZCV, TRI);
5125 });
5126}
5127
5129 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5130 // The smallest scalable element supported by scaled SVE addressing
5131 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5132 // byte offset must always be a multiple of 2.
5133 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5134
5135 // VGSized offsets are divided by '2', because the VG register is the
5136 // the number of 64bit granules as opposed to 128bit vector chunks,
5137 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5138 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5139 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5140 ByteSized = Offset.getFixed();
5141 VGSized = Offset.getScalable() / 2;
5142}
5143
5144/// Returns the offset in parts to which this frame offset can be
5145/// decomposed for the purpose of describing a frame offset.
5146/// For non-scalable offsets this is simply its byte size.
5148 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5149 int64_t &NumDataVectors) {
5150 // The smallest scalable element supported by scaled SVE addressing
5151 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5152 // byte offset must always be a multiple of 2.
5153 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5154
5155 NumBytes = Offset.getFixed();
5156 NumDataVectors = 0;
5157 NumPredicateVectors = Offset.getScalable() / 2;
5158 // This method is used to get the offsets to adjust the frame offset.
5159 // If the function requires ADDPL to be used and needs more than two ADDPL
5160 // instructions, part of the offset is folded into NumDataVectors so that it
5161 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5162 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5163 NumPredicateVectors > 62) {
5164 NumDataVectors = NumPredicateVectors / 8;
5165 NumPredicateVectors -= NumDataVectors * 8;
5166 }
5167}
5168
5169// Convenience function to create a DWARF expression for
5170// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5171static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5172 int NumVGScaledBytes, unsigned VG,
5173 llvm::raw_string_ostream &Comment) {
5174 uint8_t buffer[16];
5175
5176 if (NumBytes) {
5177 Expr.push_back(dwarf::DW_OP_consts);
5178 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5179 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5180 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5181 }
5182
5183 if (NumVGScaledBytes) {
5184 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5185 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5186
5187 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5188 Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5189 Expr.push_back(0);
5190
5191 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5192 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5193
5194 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5195 << std::abs(NumVGScaledBytes) << " * VG";
5196 }
5197}
5198
5199// Creates an MCCFIInstruction:
5200// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5202 unsigned Reg,
5203 const StackOffset &Offset) {
5204 int64_t NumBytes, NumVGScaledBytes;
5206 NumVGScaledBytes);
5207 std::string CommentBuffer;
5208 llvm::raw_string_ostream Comment(CommentBuffer);
5209
5210 if (Reg == AArch64::SP)
5211 Comment << "sp";
5212 else if (Reg == AArch64::FP)
5213 Comment << "fp";
5214 else
5215 Comment << printReg(Reg, &TRI);
5216
5217 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5218 SmallString<64> Expr;
5219 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5220 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5221 Expr.push_back(0);
5222 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5223 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5224
5225 // Wrap this into DW_CFA_def_cfa.
5226 SmallString<64> DefCfaExpr;
5227 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
5228 uint8_t buffer[16];
5229 DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
5230 DefCfaExpr.append(Expr.str());
5231 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
5232 Comment.str());
5233}
5234
5236 unsigned FrameReg, unsigned Reg,
5237 const StackOffset &Offset,
5238 bool LastAdjustmentWasScalable) {
5239 if (Offset.getScalable())
5240 return createDefCFAExpression(TRI, Reg, Offset);
5241
5242 if (FrameReg == Reg && !LastAdjustmentWasScalable)
5243 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
5244
5245 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5246 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
5247}
5248
5250 unsigned Reg,
5251 const StackOffset &OffsetFromDefCFA) {
5252 int64_t NumBytes, NumVGScaledBytes;
5254 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
5255
5256 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5257
5258 // Non-scalable offsets can use DW_CFA_offset directly.
5259 if (!NumVGScaledBytes)
5260 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
5261
5262 std::string CommentBuffer;
5263 llvm::raw_string_ostream Comment(CommentBuffer);
5264 Comment << printReg(Reg, &TRI) << " @ cfa";
5265
5266 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5267 SmallString<64> OffsetExpr;
5268 appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5269 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5270
5271 // Wrap this into DW_CFA_expression
5272 SmallString<64> CfaExpr;
5273 CfaExpr.push_back(dwarf::DW_CFA_expression);
5274 uint8_t buffer[16];
5275 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
5276 CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
5277 CfaExpr.append(OffsetExpr.str());
5278
5279 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
5280 Comment.str());
5281}
5282
5283// Helper function to emit a frame offset adjustment from a given
5284// pointer (SrcReg), stored into DestReg. This function is explicit
5285// in that it requires the opcode.
5288 const DebugLoc &DL, unsigned DestReg,
5289 unsigned SrcReg, int64_t Offset, unsigned Opc,
5290 const TargetInstrInfo *TII,
5291 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5292 bool *HasWinCFI, bool EmitCFAOffset,
5293 StackOffset CFAOffset, unsigned FrameReg) {
5294 int Sign = 1;
5295 unsigned MaxEncoding, ShiftSize;
5296 switch (Opc) {
5297 case AArch64::ADDXri:
5298 case AArch64::ADDSXri:
5299 case AArch64::SUBXri:
5300 case AArch64::SUBSXri:
5301 MaxEncoding = 0xfff;
5302 ShiftSize = 12;
5303 break;
5304 case AArch64::ADDVL_XXI:
5305 case AArch64::ADDPL_XXI:
5306 case AArch64::ADDSVL_XXI:
5307 case AArch64::ADDSPL_XXI:
5308 MaxEncoding = 31;
5309 ShiftSize = 0;
5310 if (Offset < 0) {
5311 MaxEncoding = 32;
5312 Sign = -1;
5313 Offset = -Offset;
5314 }
5315 break;
5316 default:
5317 llvm_unreachable("Unsupported opcode");
5318 }
5319
5320 // `Offset` can be in bytes or in "scalable bytes".
5321 int VScale = 1;
5322 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
5323 VScale = 16;
5324 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
5325 VScale = 2;
5326
5327 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5328 // scratch register. If DestReg is a virtual register, use it as the
5329 // scratch register; otherwise, create a new virtual register (to be
5330 // replaced by the scavenger at the end of PEI). That case can be optimized
5331 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5332 // register can be loaded with offset%8 and the add/sub can use an extending
5333 // instruction with LSL#3.
5334 // Currently the function handles any offsets but generates a poor sequence
5335 // of code.
5336 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5337
5338 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
5339 Register TmpReg = DestReg;
5340 if (TmpReg == AArch64::XZR)
5342 &AArch64::GPR64RegClass);
5343 do {
5344 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
5345 unsigned LocalShiftSize = 0;
5346 if (ThisVal > MaxEncoding) {
5347 ThisVal = ThisVal >> ShiftSize;
5348 LocalShiftSize = ShiftSize;
5349 }
5350 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
5351 "Encoding cannot handle value that big");
5352
5353 Offset -= ThisVal << LocalShiftSize;
5354 if (Offset == 0)
5355 TmpReg = DestReg;
5356 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
5357 .addReg(SrcReg)
5358 .addImm(Sign * (int)ThisVal);
5359 if (ShiftSize)
5360 MBI = MBI.addImm(
5362 MBI = MBI.setMIFlag(Flag);
5363
5364 auto Change =
5365 VScale == 1
5366 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
5367 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
5368 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
5369 CFAOffset += Change;
5370 else
5371 CFAOffset -= Change;
5372 if (EmitCFAOffset && DestReg == TmpReg) {
5373 MachineFunction &MF = *MBB.getParent();
5374 const TargetSubtargetInfo &STI = MF.getSubtarget();
5375 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
5376
5377 unsigned CFIIndex = MF.addFrameInst(
5378 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
5379 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
5380 .addCFIIndex(CFIIndex)
5381 .setMIFlags(Flag);
5382 }
5383
5384 if (NeedsWinCFI) {
5385 assert(Sign == 1 && "SEH directives should always have a positive sign");
5386 int Imm = (int)(ThisVal << LocalShiftSize);
5387 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
5388 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
5389 if (HasWinCFI)
5390 *HasWinCFI = true;
5391 if (Imm == 0)
5392 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
5393 else
5394 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
5395 .addImm(Imm)
5396 .setMIFlag(Flag);
5397 assert(Offset == 0 && "Expected remaining offset to be zero to "
5398 "emit a single SEH directive");
5399 } else if (DestReg == AArch64::SP) {
5400 if (HasWinCFI)
5401 *HasWinCFI = true;
5402 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
5403 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
5404 .addImm(Imm)
5405 .setMIFlag(Flag);
5406 }
5407 }
5408
5409 SrcReg = TmpReg;
5410 } while (Offset);
5411}
5412
5415 unsigned DestReg, unsigned SrcReg,
5417 MachineInstr::MIFlag Flag, bool SetNZCV,
5418 bool NeedsWinCFI, bool *HasWinCFI,
5419 bool EmitCFAOffset, StackOffset CFAOffset,
5420 unsigned FrameReg) {
5421 // If a function is marked as arm_locally_streaming, then the runtime value of
5422 // vscale in the prologue/epilogue is different the runtime value of vscale
5423 // in the function's body. To avoid having to consider multiple vscales,
5424 // we can use `addsvl` to allocate any scalable stack-slots, which under
5425 // most circumstances will be only locals, not callee-save slots.
5426 const Function &F = MBB.getParent()->getFunction();
5427 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
5428
5429 int64_t Bytes, NumPredicateVectors, NumDataVectors;
5431 Offset, Bytes, NumPredicateVectors, NumDataVectors);
5432
5433 // First emit non-scalable frame offsets, or a simple 'mov'.
5434 if (Bytes || (!Offset && SrcReg != DestReg)) {
5435 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
5436 "SP increment/decrement not 8-byte aligned");
5437 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
5438 if (Bytes < 0) {
5439 Bytes = -Bytes;
5440 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
5441 }
5442 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
5443 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
5444 FrameReg);
5445 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
5446 ? StackOffset::getFixed(-Bytes)
5447 : StackOffset::getFixed(Bytes);
5448 SrcReg = DestReg;
5449 FrameReg = DestReg;
5450 }
5451
5452 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
5453 "SetNZCV not supported with SVE vectors");
5454 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
5455 "WinCFI not supported with SVE vectors");
5456
5457 if (NumDataVectors) {
5458 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
5459 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
5460 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5461 CFAOffset, FrameReg);
5462 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
5463 SrcReg = DestReg;
5464 }
5465
5466 if (NumPredicateVectors) {
5467 assert(DestReg != AArch64::SP && "Unaligned access to SP");
5468 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
5469 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
5470 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5471 CFAOffset, FrameReg);
5472 }
5473}
5474
5477 MachineBasicBlock::iterator InsertPt, int FrameIndex,
5478 LiveIntervals *LIS, VirtRegMap *VRM) const {
5479 // This is a bit of a hack. Consider this instruction:
5480 //
5481 // %0 = COPY %sp; GPR64all:%0
5482 //
5483 // We explicitly chose GPR64all for the virtual register so such a copy might
5484 // be eliminated by RegisterCoalescer. However, that may not be possible, and
5485 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5486 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5487 //
5488 // To prevent that, we are going to constrain the %0 register class here.
5489 if (MI.isFullCopy()) {
5490 Register DstReg = MI.getOperand(0).getReg();
5491 Register SrcReg = MI.getOperand(1).getReg();
5492 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
5493 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
5494 return nullptr;
5495 }
5496 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
5497 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5498 return nullptr;
5499 }
5500 // Nothing can folded with copy from/to NZCV.
5501 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
5502 return nullptr;
5503 }
5504
5505 // Handle the case where a copy is being spilled or filled but the source
5506 // and destination register class don't match. For example:
5507 //
5508 // %0 = COPY %xzr; GPR64common:%0
5509 //
5510 // In this case we can still safely fold away the COPY and generate the
5511 // following spill code:
5512 //
5513 // STRXui %xzr, %stack.0
5514 //
5515 // This also eliminates spilled cross register class COPYs (e.g. between x and
5516 // d regs) of the same size. For example:
5517 //
5518 // %0 = COPY %1; GPR64:%0, FPR64:%1
5519 //
5520 // will be filled as
5521 //
5522 // LDRDui %0, fi<#0>
5523 //
5524 // instead of
5525 //
5526 // LDRXui %Temp, fi<#0>
5527 // %0 = FMOV %Temp
5528 //
5529 if (MI.isCopy() && Ops.size() == 1 &&
5530 // Make sure we're only folding the explicit COPY defs/uses.
5531 (Ops[0] == 0 || Ops[0] == 1)) {
5532 bool IsSpill = Ops[0] == 0;
5533 bool IsFill = !IsSpill;
5535 const MachineRegisterInfo &MRI = MF.getRegInfo();
5536 MachineBasicBlock &MBB = *MI.getParent();
5537 const MachineOperand &DstMO = MI.getOperand(0);
5538 const MachineOperand &SrcMO = MI.getOperand(1);
5539 Register DstReg = DstMO.getReg();
5540 Register SrcReg = SrcMO.getReg();
5541 // This is slightly expensive to compute for physical regs since
5542 // getMinimalPhysRegClass is slow.
5543 auto getRegClass = [&](unsigned Reg) {
5544 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
5545 : TRI.getMinimalPhysRegClass(Reg);
5546 };
5547
5548 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
5549 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
5550 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
5551 "Mismatched register size in non subreg COPY");
5552 if (IsSpill)
5553 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
5554 getRegClass(SrcReg), &TRI, Register());
5555 else
5556 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
5557 getRegClass(DstReg), &TRI, Register());
5558 return &*--InsertPt;
5559 }
5560
5561 // Handle cases like spilling def of:
5562 //
5563 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
5564 //
5565 // where the physical register source can be widened and stored to the full
5566 // virtual reg destination stack slot, in this case producing:
5567 //
5568 // STRXui %xzr, %stack.0
5569 //
5570 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
5571 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
5572 assert(SrcMO.getSubReg() == 0 &&
5573 "Unexpected subreg on physical register");
5574 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
5575 FrameIndex, &AArch64::GPR64RegClass, &TRI,
5576 Register());
5577 return &*--InsertPt;
5578 }
5579
5580 // Handle cases like filling use of:
5581 //
5582 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
5583 //
5584 // where we can load the full virtual reg source stack slot, into the subreg
5585 // destination, in this case producing:
5586 //
5587 // LDRWui %0:sub_32<def,read-undef>, %stack.0
5588 //
5589 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
5590 const TargetRegisterClass *FillRC;
5591 switch (DstMO.getSubReg()) {
5592 default:
5593 FillRC = nullptr;
5594 break;
5595 case AArch64::sub_32:
5596 FillRC = &AArch64::GPR32RegClass;
5597 break;
5598 case AArch64::ssub:
5599 FillRC = &AArch64::FPR32RegClass;
5600 break;
5601 case AArch64::dsub:
5602 FillRC = &AArch64::FPR64RegClass;
5603 break;
5604 }
5605
5606 if (FillRC) {
5607 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
5608 TRI.getRegSizeInBits(*FillRC) &&
5609 "Mismatched regclass size on folded subreg COPY");
5610 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
5611 Register());
5612 MachineInstr &LoadMI = *--InsertPt;
5613 MachineOperand &LoadDst = LoadMI.getOperand(0);
5614 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
5615 LoadDst.setSubReg(DstMO.getSubReg());
5616 LoadDst.setIsUndef();
5617 return &LoadMI;
5618 }
5619 }
5620 }
5621
5622 // Cannot fold.
5623 return nullptr;
5624}
5625
5627 StackOffset &SOffset,
5628 bool *OutUseUnscaledOp,
5629 unsigned *OutUnscaledOp,
5630 int64_t *EmittableOffset) {
5631 // Set output values in case of early exit.
5632 if (EmittableOffset)
5633 *EmittableOffset = 0;
5634 if (OutUseUnscaledOp)
5635 *OutUseUnscaledOp = false;
5636 if (OutUnscaledOp)
5637 *OutUnscaledOp = 0;
5638
5639 // Exit early for structured vector spills/fills as they can't take an
5640 // immediate offset.
5641 switch (MI.getOpcode()) {
5642 default:
5643 break;
5644 case AArch64::LD1Rv1d:
5645 case AArch64::LD1Rv2s:
5646 case AArch64::LD1Rv2d:
5647 case AArch64::LD1Rv4h:
5648 case AArch64::LD1Rv4s:
5649 case AArch64::LD1Rv8b:
5650 case AArch64::LD1Rv8h:
5651 case AArch64::LD1Rv16b:
5652 case AArch64::LD1Twov2d:
5653 case AArch64::LD1Threev2d:
5654 case AArch64::LD1Fourv2d:
5655 case AArch64::LD1Twov1d:
5656 case AArch64::LD1Threev1d:
5657 case AArch64::LD1Fourv1d:
5658 case AArch64::ST1Twov2d:
5659 case AArch64::ST1Threev2d:
5660 case AArch64::ST1Fourv2d:
5661 case AArch64::ST1Twov1d:
5662 case AArch64::ST1Threev1d:
5663 case AArch64::ST1Fourv1d:
5664 case AArch64::ST1i8:
5665 case AArch64::ST1i16:
5666 case AArch64::ST1i32:
5667 case AArch64::ST1i64:
5668 case AArch64::IRG:
5669 case AArch64::IRGstack:
5670 case AArch64::STGloop:
5671 case AArch64::STZGloop:
5673 }
5674
5675 // Get the min/max offset and the scale.
5676 TypeSize ScaleValue(0U, false), Width(0U, false);
5677 int64_t MinOff, MaxOff;
5678 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
5679 MaxOff))
5680 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5681
5682 // Construct the complete offset.
5683 bool IsMulVL = ScaleValue.isScalable();
5684 unsigned Scale = ScaleValue.getKnownMinValue();
5685 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
5686
5687 const MachineOperand &ImmOpnd =
5688 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
5689 Offset += ImmOpnd.getImm() * Scale;
5690
5691 // If the offset doesn't match the scale, we rewrite the instruction to
5692 // use the unscaled instruction instead. Likewise, if we have a negative
5693 // offset and there is an unscaled op to use.
5694 std::optional<unsigned> UnscaledOp =
5696 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
5697 if (useUnscaledOp &&
5698 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
5699 MaxOff))
5700 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5701
5702 Scale = ScaleValue.getKnownMinValue();
5703 assert(IsMulVL == ScaleValue.isScalable() &&
5704 "Unscaled opcode has different value for scalable");
5705
5706 int64_t Remainder = Offset % Scale;
5707 assert(!(Remainder && useUnscaledOp) &&
5708 "Cannot have remainder when using unscaled op");
5709
5710 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
5711 int64_t NewOffset = Offset / Scale;
5712 if (MinOff <= NewOffset && NewOffset <= MaxOff)
5713 Offset = Remainder;
5714 else {
5715 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
5716 Offset = Offset - NewOffset * Scale;
5717 }
5718
5719 if (EmittableOffset)
5720 *EmittableOffset = NewOffset;
5721 if (OutUseUnscaledOp)
5722 *OutUseUnscaledOp = useUnscaledOp;
5723 if (OutUnscaledOp && UnscaledOp)
5724 *OutUnscaledOp = *UnscaledOp;
5725
5726 if (IsMulVL)
5727 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
5728 else
5729 SOffset = StackOffset::get(Offset, SOffset.getScalable());
5731 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
5732}
5733
5735 unsigned FrameReg, StackOffset &Offset,
5736 const AArch64InstrInfo *TII) {
5737 unsigned Opcode = MI.getOpcode();
5738 unsigned ImmIdx = FrameRegIdx + 1;
5739
5740 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
5741 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
5742 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
5743 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
5744 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
5745 MI.eraseFromParent();
5746 Offset = StackOffset();
5747 return true;
5748 }
5749
5750 int64_t NewOffset;
5751 unsigned UnscaledOp;
5752 bool UseUnscaledOp;
5753 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
5754 &UnscaledOp, &NewOffset);
5757 // Replace the FrameIndex with FrameReg.
5758 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
5759 if (UseUnscaledOp)
5760 MI.setDesc(TII->get(UnscaledOp));
5761
5762 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
5763 return !Offset;
5764 }
5765
5766 return false;
5767}
5768
5771 DebugLoc DL;
5772 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
5773}
5774
5776 return MCInstBuilder(AArch64::HINT).addImm(0);
5777}
5778
5779// AArch64 supports MachineCombiner.
5780bool AArch64InstrInfo::useMachineCombiner() const { return true; }
5781
5782// True when Opc sets flag
5783static bool isCombineInstrSettingFlag(unsigned Opc) {
5784 switch (Opc) {
5785 case AArch64::ADDSWrr:
5786 case AArch64::ADDSWri:
5787 case AArch64::ADDSXrr:
5788 case AArch64::ADDSXri:
5789 case AArch64::SUBSWrr:
5790 case AArch64::SUBSXrr:
5791 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5792 case AArch64::SUBSWri:
5793 case AArch64::SUBSXri:
5794 return true;
5795 default:
5796 break;
5797 }
5798 return false;
5799}
5800
5801// 32b Opcodes that can be combined with a MUL
5802static bool isCombineInstrCandidate32(unsigned Opc) {
5803 switch (Opc) {
5804 case AArch64::ADDWrr:
5805 case AArch64::ADDWri:
5806 case AArch64::SUBWrr:
5807 case AArch64::ADDSWrr:
5808 case AArch64::ADDSWri:
5809 case AArch64::SUBSWrr:
5810 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5811 case AArch64::SUBWri:
5812 case AArch64::SUBSWri:
5813 return true;
5814 default:
5815 break;
5816 }
5817 return false;
5818}
5819
5820// 64b Opcodes that can be combined with a MUL
5821static bool isCombineInstrCandidate64(unsigned Opc) {
5822 switch (Opc) {
5823 case AArch64::ADDXrr:
5824 case AArch64::ADDXri:
5825 case AArch64::SUBXrr:
5826 case AArch64::ADDSXrr:
5827 case AArch64::ADDSXri:
5828 case AArch64::SUBSXrr:
5829 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5830 case AArch64::SUBXri:
5831 case AArch64::SUBSXri:
5832 case AArch64::ADDv8i8:
5833 case AArch64::ADDv16i8:
5834 case AArch64::ADDv4i16:
5835 case AArch64::ADDv8i16:
5836 case AArch64::ADDv2i32:
5837 case AArch64::ADDv4i32:
5838 case AArch64::SUBv8i8:
5839 case AArch64::SUBv16i8:
5840 case AArch64::SUBv4i16:
5841 case AArch64::SUBv8i16:
5842 case AArch64::SUBv2i32:
5843 case AArch64::SUBv4i32:
5844 return true;
5845 default:
5846 break;
5847 }
5848 return false;
5849}
5850
5851// FP Opcodes that can be combined with a FMUL.
5852static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
5853 switch (Inst.getOpcode()) {
5854 default:
5855 break;
5856 case AArch64::FADDHrr:
5857 case AArch64::FADDSrr:
5858 case AArch64::FADDDrr:
5859 case AArch64::FADDv4f16:
5860 case AArch64::FADDv8f16:
5861 case AArch64::FADDv2f32:
5862 case AArch64::FADDv2f64:
5863 case AArch64::FADDv4f32:
5864 case AArch64::FSUBHrr:
5865 case AArch64::FSUBSrr:
5866 case AArch64::FSUBDrr:
5867 case AArch64::FSUBv4f16:
5868 case AArch64::FSUBv8f16:
5869 case AArch64::FSUBv2f32:
5870 case AArch64::FSUBv2f64:
5871 case AArch64::FSUBv4f32:
5873 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
5874 // the target options or if FADD/FSUB has the contract fast-math flag.
5875 return Options.UnsafeFPMath ||
5876 Options.AllowFPOpFusion == FPOpFusion::Fast ||
5878 return true;
5879 }
5880 return false;
5881}
5882
5883// Opcodes that can be combined with a MUL
5884static bool isCombineInstrCandidate(unsigned Opc) {
5886}
5887
5888//
5889// Utility routine that checks if \param MO is defined by an
5890// \param CombineOpc instruction in the basic block \param MBB
5892 unsigned CombineOpc, unsigned ZeroReg = 0,
5893 bool CheckZeroReg = false) {
5895 MachineInstr *MI = nullptr;
5896
5897 if (MO.isReg() && MO.getReg().isVirtual())
5898 MI = MRI.getUniqueVRegDef(MO.getReg());
5899 // And it needs to be in the trace (otherwise, it won't have a depth).
5900 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
5901 return false;
5902 // Must only used by the user we combine with.
5903 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
5904 return false;
5905
5906 if (CheckZeroReg) {
5907 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
5908 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
5909 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
5910 // The third input reg must be zero.
5911 if (MI->getOperand(3).getReg() != ZeroReg)
5912 return false;
5913 }
5914
5915 if (isCombineInstrSettingFlag(CombineOpc) &&
5916 MI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
5917 return false;
5918
5919 return true;
5920}
5921
5922//
5923// Is \param MO defined by an integer multiply and can be combined?
5925 unsigned MulOpc, unsigned ZeroReg) {
5926 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
5927}
5928
5929//
5930// Is \param MO defined by a floating-point multiply and can be combined?
5932 unsigned MulOpc) {
5933 return canCombine(MBB, MO, MulOpc);
5934}
5935
5936// TODO: There are many more machine instruction opcodes to match:
5937// 1. Other data types (integer, vectors)
5938// 2. Other math / logic operations (xor, or)
5939// 3. Other forms of the same operation (intrinsics and other variants)
5941 bool Invert) const {
5942 if (Invert)
5943 return false;
5944 switch (Inst.getOpcode()) {
5945 // == Floating-point types ==
5946 // -- Floating-point instructions --
5947 case AArch64::FADDHrr:
5948 case AArch64::FADDSrr:
5949 case AArch64::FADDDrr:
5950 case AArch64::FMULHrr:
5951 case AArch64::FMULSrr:
5952 case AArch64::FMULDrr:
5953 case AArch64::FMULX16:
5954 case AArch64::FMULX32:
5955 case AArch64::FMULX64:
5956 // -- Advanced SIMD instructions --
5957 case AArch64::FADDv4f16:
5958 case AArch64::FADDv8f16:
5959 case AArch64::FADDv2f32:
5960 case AArch64::FADDv4f32:
5961 case AArch64::FADDv2f64:
5962 case AArch64::FMULv4f16:
5963 case AArch64::FMULv8f16:
5964 case AArch64::FMULv2f32:
5965 case AArch64::FMULv4f32:
5966 case AArch64::FMULv2f64:
5967 case AArch64::FMULXv4f16:
5968 case AArch64::FMULXv8f16:
5969 case AArch64::FMULXv2f32:
5970 case AArch64::FMULXv4f32:
5971 case AArch64::FMULXv2f64:
5972 // -- SVE instructions --
5973 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
5974 // in the SVE instruction set (though there are predicated ones).
5975 case AArch64::FADD_ZZZ_H:
5976 case AArch64::FADD_ZZZ_S:
5977 case AArch64::FADD_ZZZ_D:
5978 case AArch64::FMUL_ZZZ_H:
5979 case AArch64::FMUL_ZZZ_S:
5980 case AArch64::FMUL_ZZZ_D:
5981 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
5984
5985 // == Integer types ==
5986 // -- Base instructions --
5987 // Opcodes MULWrr and MULXrr don't exist because
5988 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
5989 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
5990 // The machine-combiner does not support three-source-operands machine
5991 // instruction. So we cannot reassociate MULs.
5992 case AArch64::ADDWrr:
5993 case AArch64::ADDXrr:
5994 case AArch64::ANDWrr:
5995 case AArch64::ANDXrr:
5996 case AArch64::ORRWrr:
5997 case AArch64::ORRXrr:
5998 case AArch64::EORWrr:
5999 case AArch64::EORXrr:
6000 case AArch64::EONWrr:
6001 case AArch64::EONXrr:
6002 // -- Advanced SIMD instructions --
6003 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6004 // in the Advanced SIMD instruction set.
6005 case AArch64::ADDv8i8:
6006 case AArch64::ADDv16i8:
6007 case AArch64::ADDv4i16:
6008 case AArch64::ADDv8i16:
6009 case AArch64::ADDv2i32:
6010 case AArch64::ADDv4i32:
6011 case AArch64::ADDv1i64:
6012 case AArch64::ADDv2i64:
6013 case AArch64::MULv8i8:
6014 case AArch64::MULv16i8:
6015 case AArch64::MULv4i16:
6016 case AArch64::MULv8i16:
6017 case AArch64::MULv2i32:
6018 case AArch64::MULv4i32:
6019 case AArch64::ANDv8i8:
6020 case AArch64::ANDv16i8:
6021 case AArch64::ORRv8i8:
6022 case AArch64::ORRv16i8:
6023 case AArch64::EORv8i8:
6024 case AArch64::EORv16i8:
6025 // -- SVE instructions --
6026 case AArch64::ADD_ZZZ_B:
6027 case AArch64::ADD_ZZZ_H:
6028 case AArch64::ADD_ZZZ_S:
6029 case AArch64::ADD_ZZZ_D:
6030 case AArch64::MUL_ZZZ_B:
6031 case AArch64::MUL_ZZZ_H:
6032 case AArch64::MUL_ZZZ_S:
6033 case AArch64::MUL_ZZZ_D:
6034 case AArch64::AND_ZZZ:
6035 case AArch64::ORR_ZZZ:
6036 case AArch64::EOR_ZZZ:
6037 return true;
6038
6039 default:
6040 return false;
6041 }
6042}
6043
6044/// Find instructions that can be turned into madd.
6046 SmallVectorImpl<unsigned> &Patterns) {
6047 unsigned Opc = Root.getOpcode();
6048 MachineBasicBlock &MBB = *Root.getParent();
6049 bool Found = false;
6050
6051 if (!isCombineInstrCandidate(Opc))
6052 return false;
6053 if (isCombineInstrSettingFlag(Opc)) {
6054 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
6055 // When NZCV is live bail out.
6056 if (Cmp_NZCV == -1)
6057 return false;
6058 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6059 // When opcode can't change bail out.
6060 // CHECKME: do we miss any cases for opcode conversion?
6061 if (NewOpc == Opc)
6062 return false;
6063 Opc = NewOpc;
6064 }
6065
6066 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6067 unsigned Pattern) {
6068 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6069 Patterns.push_back(Pattern);
6070 Found = true;
6071 }
6072 };
6073
6074 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6075 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6076 Patterns.push_back(Pattern);
6077 Found = true;
6078 }
6079 };
6080
6082
6083 switch (Opc) {
6084 default:
6085 break;
6086 case AArch64::ADDWrr:
6087 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6088 "ADDWrr does not have register operands");
6089 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6090 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6091 break;
6092 case AArch64::ADDXrr:
6093 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6094 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6095 break;
6096 case AArch64::SUBWrr:
6097 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6098 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6099 break;
6100 case AArch64::SUBXrr:
6101 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6102 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6103 break;
6104 case AArch64::ADDWri:
6105 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6106 break;
6107 case AArch64::ADDXri:
6108 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6109 break;
6110 case AArch64::SUBWri:
6111 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6112 break;
6113 case AArch64::SUBXri:
6114 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6115 break;
6116 case AArch64::ADDv8i8:
6117 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6118 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6119 break;
6120 case AArch64::ADDv16i8:
6121 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6122 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6123 break;
6124 case AArch64::ADDv4i16:
6125 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6126 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6127 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6128 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6129 break;
6130 case AArch64::ADDv8i16:
6131 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6132 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6133 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6134 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6135 break;
6136 case AArch64::ADDv2i32:
6137 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6138 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6139 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6140 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6141 break;
6142 case AArch64::ADDv4i32:
6143 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6144 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6145 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6146 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6147 break;
6148 case AArch64::SUBv8i8:
6149 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6150 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6151 break;
6152 case AArch64::SUBv16i8:
6153 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6154 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6155 break;
6156 case AArch64::SUBv4i16:
6157 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6158 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6159 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6160 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6161 break;
6162 case AArch64::SUBv8i16:
6163 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6164 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6165 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6166 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6167 break;
6168 case AArch64::SUBv2i32:
6169 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6170 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6171 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6172 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6173 break;
6174 case AArch64::SUBv4i32:
6175 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6176 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6177 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6178 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
6179 break;
6180 }
6181 return Found;
6182}
6183/// Floating-Point Support
6184
6185/// Find instructions that can be turned into madd.
6187 SmallVectorImpl<unsigned> &Patterns) {
6188
6189 if (!isCombineInstrCandidateFP(Root))
6190 return false;
6191
6192 MachineBasicBlock &MBB = *Root.getParent();
6193 bool Found = false;
6194
6195 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
6196 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
6197 Patterns.push_back(Pattern);
6198 return true;
6199 }
6200 return false;
6201 };
6202
6204
6205 switch (Root.getOpcode()) {
6206 default:
6207 assert(false && "Unsupported FP instruction in combiner\n");
6208 break;
6209 case AArch64::FADDHrr:
6210 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6211 "FADDHrr does not have register operands");
6212
6213 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
6214 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
6215 break;
6216 case AArch64::FADDSrr:
6217 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6218 "FADDSrr does not have register operands");
6219
6220 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
6221 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
6222
6223 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
6224 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
6225 break;
6226 case AArch64::FADDDrr:
6227 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
6228 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
6229
6230 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
6231 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
6232 break;
6233 case AArch64::FADDv4f16:
6234 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
6235 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
6236
6237 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
6238 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
6239 break;
6240 case AArch64::FADDv8f16:
6241 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
6242 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
6243
6244 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
6245 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
6246 break;
6247 case AArch64::FADDv2f32:
6248 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
6249 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
6250
6251 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
6252 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
6253 break;
6254 case AArch64::FADDv2f64:
6255 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
6256 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
6257
6258 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
6259 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
6260 break;
6261 case AArch64::FADDv4f32:
6262 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
6263 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
6264
6265 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
6266 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
6267 break;
6268 case AArch64::FSUBHrr:
6269 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
6270 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
6271 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
6272 break;
6273 case AArch64::FSUBSrr:
6274 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
6275
6276 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
6277 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
6278
6279 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
6280 break;
6281 case AArch64::FSUBDrr:
6282 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
6283
6284 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
6285 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
6286
6287 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
6288 break;
6289 case AArch64::FSUBv4f16:
6290 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
6291 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
6292
6293 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
6294 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
6295 break;
6296 case AArch64::FSUBv8f16:
6297 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
6298 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
6299
6300 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
6301 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
6302 break;
6303 case AArch64::FSUBv2f32:
6304 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
6305 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
6306
6307 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
6308 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
6309 break;
6310 case AArch64::FSUBv2f64:
6311 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
6312 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
6313
6314 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
6315 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
6316 break;
6317 case AArch64::FSUBv4f32:
6318 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
6319 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
6320
6321 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
6322 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
6323 break;
6324 }
6325 return Found;
6326}
6327
6329 SmallVectorImpl<unsigned> &Patterns) {
6330 MachineBasicBlock &MBB = *Root.getParent();
6331 bool Found = false;
6332
6333 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
6335 MachineOperand &MO = Root.getOperand(Operand);
6336 MachineInstr *MI = nullptr;
6337 if (MO.isReg() && MO.getReg().isVirtual())
6338 MI = MRI.getUniqueVRegDef(MO.getReg());
6339 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6340 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
6341 MI->getOperand(1).getReg().isVirtual())
6342 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
6343 if (MI && MI->getOpcode() == Opcode) {
6344 Patterns.push_back(Pattern);
6345 return true;
6346 }
6347 return false;
6348 };
6349
6351
6352 switch (Root.getOpcode()) {
6353 default:
6354 return false;
6355 case AArch64::FMULv2f32:
6356 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
6357 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
6358 break;
6359 case AArch64::FMULv2f64:
6360 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
6361 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
6362 break;
6363 case AArch64::FMULv4f16:
6364 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
6365 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
6366 break;
6367 case AArch64::FMULv4f32:
6368 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
6369 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
6370 break;
6371 case AArch64::FMULv8f16:
6372 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
6373 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
6374 break;
6375 }
6376
6377 return Found;
6378}
6379
6381 SmallVectorImpl<unsigned> &Patterns) {
6382 unsigned Opc = Root.getOpcode();
6383 MachineBasicBlock &MBB = *Root.getParent();
6385
6386 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
6387 MachineOperand &MO = Root.getOperand(1);
6388 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
6389 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
6390 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
6394 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
6395 Patterns.push_back(Pattern);
6396 return true;
6397 }
6398 return false;
6399 };
6400
6401 switch (Opc) {
6402 default:
6403 break;
6404 case AArch64::FNEGDr:
6405 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
6406 case AArch64::FNEGSr:
6407 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
6408 }
6409
6410 return false;
6411}
6412
6413/// Return true when a code sequence can improve throughput. It
6414/// should be called only for instructions in loops.
6415/// \param Pattern - combiner pattern
6417 switch (Pattern) {
6418 default:
6419 break;
6525 return true;
6526 } // end switch (Pattern)
6527 return false;
6528}
6529
6530/// Find other MI combine patterns.
6532 SmallVectorImpl<unsigned> &Patterns) {
6533 // A - (B + C) ==> (A - B) - C or (A - C) - B
6534 unsigned Opc = Root.getOpcode();
6535 MachineBasicBlock &MBB = *Root.getParent();
6536
6537 switch (Opc) {
6538 case AArch64::SUBWrr:
6539 case AArch64::SUBSWrr:
6540 case AArch64::SUBXrr:
6541 case AArch64::SUBSXrr:
6542 // Found candidate root.
6543 break;
6544 default:
6545 return false;
6546 }
6547
6548 if (isCombineInstrSettingFlag(Opc) &&
6549 Root.findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
6550 return false;
6551
6552 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
6553 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
6554 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
6555 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
6558 return true;
6559 }
6560
6561 return false;
6562}
6563
6566 switch (Pattern) {
6570 default:
6572 }
6573}
6574
6575/// Return true when there is potentially a faster code sequence for an
6576/// instruction chain ending in \p Root. All potential patterns are listed in
6577/// the \p Pattern vector. Pattern should be sorted in priority order since the
6578/// pattern evaluator stops checking as soon as it finds a faster sequence.
6579
6581 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
6582 bool DoRegPressureReduce) const {
6583 // Integer patterns
6584 if (getMaddPatterns(Root, Patterns))
6585 return true;
6586 // Floating point patterns
6587 if (getFMULPatterns(Root, Patterns))
6588 return true;
6589 if (getFMAPatterns(Root, Patterns))
6590 return true;
6591 if (getFNEGPatterns(Root, Patterns))
6592 return true;
6593
6594 // Other patterns
6595 if (getMiscPatterns(Root, Patterns))
6596 return true;
6597
6598 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
6599 DoRegPressureReduce);
6600}
6601
6603/// genFusedMultiply - Generate fused multiply instructions.
6604/// This function supports both integer and floating point instructions.
6605/// A typical example:
6606/// F|MUL I=A,B,0
6607/// F|ADD R,I,C
6608/// ==> F|MADD R,A,B,C
6609/// \param MF Containing MachineFunction
6610/// \param MRI Register information
6611/// \param TII Target information
6612/// \param Root is the F|ADD instruction
6613/// \param [out] InsInstrs is a vector of machine instructions and will
6614/// contain the generated madd instruction
6615/// \param IdxMulOpd is index of operand in Root that is the result of
6616/// the F|MUL. In the example above IdxMulOpd is 1.
6617/// \param MaddOpc the opcode fo the f|madd instruction
6618/// \param RC Register class of operands
6619/// \param kind of fma instruction (addressing mode) to be generated
6620/// \param ReplacedAddend is the result register from the instruction
6621/// replacing the non-combined operand, if any.
6622static MachineInstr *
6624 const TargetInstrInfo *TII, MachineInstr &Root,
6625 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
6626 unsigned MaddOpc, const TargetRegisterClass *RC,
6627 FMAInstKind kind = FMAInstKind::Default,
6628 const Register *ReplacedAddend = nullptr) {
6629 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6630
6631 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
6632 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6633 Register ResultReg = Root.getOperand(0).getReg();
6634 Register SrcReg0 = MUL->getOperand(1).getReg();
6635 bool Src0IsKill = MUL->getOperand(1).isKill();
6636 Register SrcReg1 = MUL->getOperand(2).getReg();
6637 bool Src1IsKill = MUL->getOperand(2).isKill();
6638
6639 Register SrcReg2;
6640 bool Src2IsKill;
6641 if (ReplacedAddend) {
6642 // If we just generated a new addend, we must be it's only use.
6643 SrcReg2 = *ReplacedAddend;
6644 Src2IsKill = true;
6645 } else {
6646 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
6647 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
6648 }
6649
6650 if (ResultReg.isVirtual())
6651 MRI.constrainRegClass(ResultReg, RC);
6652 if (SrcReg0.isVirtual())
6653 MRI.constrainRegClass(SrcReg0, RC);
6654 if (SrcReg1.isVirtual())
6655 MRI.constrainRegClass(SrcReg1, RC);
6656 if (SrcReg2.isVirtual())
6657 MRI.constrainRegClass(SrcReg2, RC);
6658
6660 if (kind == FMAInstKind::Default)
6661 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6662 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6663 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6664 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6665 else if (kind == FMAInstKind::Indexed)
6666 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6667 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6668 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6669 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6670 .addImm(MUL->getOperand(3).getImm());
6671 else if (kind == FMAInstKind::Accumulator)
6672 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6673 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6674 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6675 .addReg(SrcReg1, getKillRegState(Src1IsKill));
6676 else
6677 assert(false && "Invalid FMA instruction kind \n");
6678 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
6679 InsInstrs.push_back(MIB);
6680 return MUL;
6681}
6682
6683static MachineInstr *
6685 const TargetInstrInfo *TII, MachineInstr &Root,
6687 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
6688
6689 unsigned Opc = 0;
6690 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
6691 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6692 Opc = AArch64::FNMADDSrrr;
6693 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
6694 Opc = AArch64::FNMADDDrrr;
6695 else
6696 return nullptr;
6697
6698 Register ResultReg = Root.getOperand(0).getReg();
6699 Register SrcReg0 = MAD->getOperand(1).getReg();
6700 Register SrcReg1 = MAD->getOperand(2).getReg();
6701 Register SrcReg2 = MAD->getOperand(3).getReg();
6702 bool Src0IsKill = MAD->getOperand(1).isKill();
6703 bool Src1IsKill = MAD->getOperand(2).isKill();
6704 bool Src2IsKill = MAD->getOperand(3).isKill();
6705 if (ResultReg.isVirtual())
6706 MRI.constrainRegClass(ResultReg, RC);
6707 if (SrcReg0.isVirtual())
6708 MRI.constrainRegClass(SrcReg0, RC);
6709 if (SrcReg1.isVirtual())
6710 MRI.constrainRegClass(SrcReg1, RC);
6711 if (SrcReg2.isVirtual())
6712 MRI.constrainRegClass(SrcReg2, RC);
6713
6715 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
6716 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6717 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6718 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6719 InsInstrs.push_back(MIB);
6720
6721 return MAD;
6722}
6723
6724/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
6725static MachineInstr *
6728 unsigned IdxDupOp, unsigned MulOpc,
6730 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
6731 "Invalid index of FMUL operand");
6732
6733 MachineFunction &MF = *Root.getMF();
6735
6736 MachineInstr *Dup =
6737 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
6738
6739 if (Dup->getOpcode() == TargetOpcode::COPY)
6740 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
6741
6742 Register DupSrcReg = Dup->getOperand(1).getReg();
6743 MRI.clearKillFlags(DupSrcReg);
6744 MRI.constrainRegClass(DupSrcReg, RC);
6745
6746 unsigned DupSrcLane = Dup->getOperand(2).getImm();
6747
6748 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
6749 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
6750
6751 Register ResultReg = Root.getOperand(0).getReg();
6752
6754 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
6755 .add(MulOp)
6756 .addReg(DupSrcReg)
6757 .addImm(DupSrcLane);
6758
6759 InsInstrs.push_back(MIB);
6760 return &Root;
6761}
6762
6763/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
6764/// instructions.
6765///
6766/// \see genFusedMultiply
6770 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6771 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6772 FMAInstKind::Accumulator);
6773}
6774
6775/// genNeg - Helper to generate an intermediate negation of the second operand
6776/// of Root
6778 const TargetInstrInfo *TII, MachineInstr &Root,
6780 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
6781 unsigned MnegOpc, const TargetRegisterClass *RC) {
6782 Register NewVR = MRI.createVirtualRegister(RC);
6784 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
6785 .add(Root.getOperand(2));
6786 InsInstrs.push_back(MIB);
6787
6788 assert(InstrIdxForVirtReg.empty());
6789 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6790
6791 return NewVR;
6792}
6793
6794/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6795/// instructions with an additional negation of the accumulator
6799 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6800 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6801 assert(IdxMulOpd == 1);
6802
6803 Register NewVR =
6804 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6805 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6806 FMAInstKind::Accumulator, &NewVR);
6807}
6808
6809/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
6810/// instructions.
6811///
6812/// \see genFusedMultiply
6816 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6817 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6818 FMAInstKind::Indexed);
6819}
6820
6821/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6822/// instructions with an additional negation of the accumulator
6826 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6827 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6828 assert(IdxMulOpd == 1);
6829
6830 Register NewVR =
6831 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6832
6833 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6834 FMAInstKind::Indexed, &NewVR);
6835}
6836
6837/// genMaddR - Generate madd instruction and combine mul and add using
6838/// an extra virtual register
6839/// Example - an ADD intermediate needs to be stored in a register:
6840/// MUL I=A,B,0
6841/// ADD R,I,Imm
6842/// ==> ORR V, ZR, Imm
6843/// ==> MADD R,A,B,V
6844/// \param MF Containing MachineFunction
6845/// \param MRI Register information
6846/// \param TII Target information
6847/// \param Root is the ADD instruction
6848/// \param [out] InsInstrs is a vector of machine instructions and will
6849/// contain the generated madd instruction
6850/// \param IdxMulOpd is index of operand in Root that is the result of
6851/// the MUL. In the example above IdxMulOpd is 1.
6852/// \param MaddOpc the opcode fo the madd instruction
6853/// \param VR is a virtual register that holds the value of an ADD operand
6854/// (V in the example above).
6855/// \param RC Register class of operands
6857 const TargetInstrInfo *TII, MachineInstr &Root,
6859 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
6860 const TargetRegisterClass *RC) {
6861 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6862
6863 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6864 Register ResultReg = Root.getOperand(0).getReg();
6865 Register SrcReg0 = MUL->getOperand(1).getReg();
6866 bool Src0IsKill = MUL->getOperand(1).isKill();
6867 Register SrcReg1 = MUL->getOperand(2).getReg();
6868 bool Src1IsKill = MUL->getOperand(2).isKill();
6869
6870 if (ResultReg.isVirtual())
6871 MRI.constrainRegClass(ResultReg, RC);
6872 if (SrcReg0.isVirtual())
6873 MRI.constrainRegClass(SrcReg0, RC);
6874 if (SrcReg1.isVirtual())
6875 MRI.constrainRegClass(SrcReg1, RC);
6877 MRI.constrainRegClass(VR, RC);
6878
6880 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6881 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6882 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6883 .addReg(VR);
6884 // Insert the MADD
6885 InsInstrs.push_back(MIB);
6886 return MUL;
6887}
6888
6889/// Do the following transformation
6890/// A - (B + C) ==> (A - B) - C
6891/// A - (B + C) ==> (A - C) - B
6892static void
6894 const TargetInstrInfo *TII, MachineInstr &Root,
6897 unsigned IdxOpd1,
6898 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
6899 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
6900 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
6901 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
6902
6903 Register ResultReg = Root.getOperand(0).getReg();
6904 Register RegA = Root.getOperand(1).getReg();
6905 bool RegAIsKill = Root.getOperand(1).isKill();
6906 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
6907 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
6908 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
6909 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
6910 Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA));
6911
6912 unsigned Opcode = Root.getOpcode();
6913 if (Opcode == AArch64::SUBSWrr)
6914 Opcode = AArch64::SUBWrr;
6915 else if (Opcode == AArch64::SUBSXrr)
6916 Opcode = AArch64::SUBXrr;
6917 else
6918 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
6919 "Unexpected instruction opcode.");
6920
6921 MachineInstrBuilder MIB1 =
6922 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
6923 .addReg(RegA, getKillRegState(RegAIsKill))
6924 .addReg(RegB, getKillRegState(RegBIsKill));
6925 MachineInstrBuilder MIB2 =
6926 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
6927 .addReg(NewVR, getKillRegState(true))
6928 .addReg(RegC, getKillRegState(RegCIsKill));
6929
6930 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6931 InsInstrs.push_back(MIB1);
6932 InsInstrs.push_back(MIB2);
6933 DelInstrs.push_back(AddMI);
6934}
6935
6936/// When getMachineCombinerPatterns() finds potential patterns,
6937/// this function generates the instructions that could replace the
6938/// original code sequence
6940 MachineInstr &Root, unsigned Pattern,
6943 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
6944 MachineBasicBlock &MBB = *Root.getParent();
6946 MachineFunction &MF = *MBB.getParent();
6948
6949 MachineInstr *MUL = nullptr;
6950 const TargetRegisterClass *RC;
6951 unsigned Opc;
6952 switch (Pattern) {
6953 default:
6954 // Reassociate instructions.
6956 DelInstrs, InstrIdxForVirtReg);
6957 return;
6959 // A - (B + C)
6960 // ==> (A - B) - C
6961 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
6962 InstrIdxForVirtReg);
6963 break;
6965 // A - (B + C)
6966 // ==> (A - C) - B
6967 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
6968 InstrIdxForVirtReg);
6969 break;
6972 // MUL I=A,B,0
6973 // ADD R,I,C
6974 // ==> MADD R,A,B,C
6975 // --- Create(MADD);
6977 Opc = AArch64::MADDWrrr;
6978 RC = &AArch64::GPR32RegClass;
6979 } else {
6980 Opc = AArch64::MADDXrrr;
6981 RC = &AArch64::GPR64RegClass;
6982 }
6983 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
6984 break;
6987 // MUL I=A,B,0
6988 // ADD R,C,I
6989 // ==> MADD R,A,B,C
6990 // --- Create(MADD);
6992 Opc = AArch64::MADDWrrr;
6993 RC = &AArch64::GPR32RegClass;
6994 } else {
6995 Opc = AArch64::MADDXrrr;
6996 RC = &AArch64::GPR64RegClass;
6997 }
6998 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
6999 break;
7002 // MUL I=A,B,0
7003 // ADD R,I,Imm
7004 // ==> MOV V, Imm
7005 // ==> MADD R,A,B,V
7006 // --- Create(MADD);
7007 const TargetRegisterClass *OrrRC;
7008 unsigned BitSize, OrrOpc, ZeroReg;
7010 OrrOpc = AArch64::ORRWri;
7011 OrrRC = &AArch64::GPR32spRegClass;
7012 BitSize = 32;
7013 ZeroReg = AArch64::WZR;
7014 Opc = AArch64::MADDWrrr;
7015 RC = &AArch64::GPR32RegClass;
7016 } else {
7017 OrrOpc = AArch64::ORRXri;
7018 OrrRC = &AArch64::GPR64spRegClass;
7019 BitSize = 64;
7020 ZeroReg = AArch64::XZR;
7021 Opc = AArch64::MADDXrrr;
7022 RC = &AArch64::GPR64RegClass;
7023 }
7024 Register NewVR = MRI.createVirtualRegister(OrrRC);
7025 uint64_t Imm = Root.getOperand(2).getImm();
7026
7027 if (Root.getOperand(3).isImm()) {
7028 unsigned Val = Root.getOperand(3).getImm();
7029 Imm = Imm << Val;
7030 }
7031 uint64_t UImm = SignExtend64(Imm, BitSize);
7032 // The immediate can be composed via a single instruction.
7034 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7035 if (Insn.size() != 1)
7036 return;
7037 auto MovI = Insn.begin();
7039 // MOV is an alias for one of three instructions: movz, movn, and orr.
7040 if (MovI->Opcode == OrrOpc)
7041 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7042 .addReg(ZeroReg)
7043 .addImm(MovI->Op2);
7044 else {
7045 if (BitSize == 32)
7046 assert((MovI->Opcode == AArch64::MOVNWi ||
7047 MovI->Opcode == AArch64::MOVZWi) &&
7048 "Expected opcode");
7049 else
7050 assert((MovI->Opcode == AArch64::MOVNXi ||
7051 MovI->Opcode == AArch64::MOVZXi) &&
7052 "Expected opcode");
7053 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7054 .addImm(MovI->Op1)
7055 .addImm(MovI->Op2);
7056 }
7057 InsInstrs.push_back(MIB1);
7058 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7059 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7060 break;
7061 }
7064 // MUL I=A,B,0
7065 // SUB R,I, C
7066 // ==> SUB V, 0, C
7067 // ==> MADD R,A,B,V // = -C + A*B
7068 // --- Create(MADD);
7069 const TargetRegisterClass *SubRC;
7070 unsigned SubOpc, ZeroReg;
7072 SubOpc = AArch64::SUBWrr;
7073 SubRC = &AArch64::GPR32spRegClass;
7074 ZeroReg = AArch64::WZR;
7075 Opc = AArch64::MADDWrrr;
7076 RC = &AArch64::GPR32RegClass;
7077 } else {
7078 SubOpc = AArch64::SUBXrr;
7079 SubRC = &AArch64::GPR64spRegClass;
7080 ZeroReg = AArch64::XZR;
7081 Opc = AArch64::MADDXrrr;
7082 RC = &AArch64::GPR64RegClass;
7083 }
7084 Register NewVR = MRI.createVirtualRegister(SubRC);
7085 // SUB NewVR, 0, C
7086 MachineInstrBuilder MIB1 =
7087 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
7088 .addReg(ZeroReg)
7089 .add(Root.getOperand(2));
7090 InsInstrs.push_back(MIB1);
7091 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7092 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7093 break;
7094 }
7097 // MUL I=A,B,0
7098 // SUB R,C,I
7099 // ==> MSUB R,A,B,C (computes C - A*B)
7100 // --- Create(MSUB);
7102 Opc = AArch64::MSUBWrrr;
7103 RC = &AArch64::GPR32RegClass;
7104 } else {
7105 Opc = AArch64::MSUBXrrr;
7106 RC = &AArch64::GPR64RegClass;
7107 }
7108 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7109 break;
7112 // MUL I=A,B,0
7113 // SUB R,I, Imm
7114 // ==> MOV V, -Imm
7115 // ==> MADD R,A,B,V // = -Imm + A*B
7116 // --- Create(MADD);
7117 const TargetRegisterClass *OrrRC;
7118 unsigned BitSize, OrrOpc, ZeroReg;
7120 OrrOpc = AArch64::ORRWri;
7121 OrrRC = &AArch64::GPR32spRegClass;
7122 BitSize = 32;
7123 ZeroReg = AArch64::WZR;
7124 Opc = AArch64::MADDWrrr;
7125 RC = &AArch64::GPR32RegClass;
7126 } else {
7127 OrrOpc = AArch64::ORRXri;
7128 OrrRC = &AArch64::GPR64spRegClass;
7129 BitSize = 64;
7130 ZeroReg = AArch64::XZR;
7131 Opc = AArch64::MADDXrrr;
7132 RC = &AArch64::GPR64RegClass;
7133 }
7134 Register NewVR = MRI.createVirtualRegister(OrrRC);
7135 uint64_t Imm = Root.getOperand(2).getImm();
7136 if (Root.getOperand(3).isImm()) {
7137 unsigned Val = Root.getOperand(3).getImm();
7138 Imm = Imm << Val;
7139 }
7140 uint64_t UImm = SignExtend64(-Imm, BitSize);
7141 // The immediate can be composed via a single instruction.
7143 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7144 if (Insn.size() != 1)
7145 return;
7146 auto MovI = Insn.begin();
7148 // MOV is an alias for one of three instructions: movz, movn, and orr.
7149 if (MovI->Opcode == OrrOpc)
7150 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7151 .addReg(ZeroReg)
7152 .addImm(MovI->Op2);
7153 else {
7154 if (BitSize == 32)
7155 assert((MovI->Opcode == AArch64::MOVNWi ||
7156 MovI->Opcode == AArch64::MOVZWi) &&
7157 "Expected opcode");
7158 else
7159 assert((MovI->Opcode == AArch64::MOVNXi ||
7160 MovI->Opcode == AArch64::MOVZXi) &&
7161 "Expected opcode");
7162 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7163 .addImm(MovI->Op1)
7164 .addImm(MovI->Op2);
7165 }
7166 InsInstrs.push_back(MIB1);
7167 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7168 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7169 break;
7170 }
7171
7173 Opc = AArch64::MLAv8i8;
7174 RC = &AArch64::FPR64RegClass;
7175 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7176 break;
7178 Opc = AArch64::MLAv8i8;
7179 RC = &AArch64::FPR64RegClass;
7180 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7181 break;
7183 Opc = AArch64::MLAv16i8;
7184 RC = &AArch64::FPR128RegClass;
7185 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7186 break;
7188 Opc = AArch64::MLAv16i8;
7189 RC = &AArch64::FPR128RegClass;
7190 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7191 break;
7193 Opc = AArch64::MLAv4i16;
7194 RC = &AArch64::FPR64RegClass;
7195 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7196 break;
7198 Opc = AArch64::MLAv4i16;
7199 RC = &AArch64::FPR64RegClass;
7200 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7201 break;
7203 Opc = AArch64::MLAv8i16;
7204 RC = &AArch64::FPR128RegClass;
7205 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7206 break;
7208 Opc = AArch64::MLAv8i16;
7209 RC = &AArch64::FPR128RegClass;
7210 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7211 break;
7213 Opc = AArch64::MLAv2i32;
7214 RC = &AArch64::FPR64RegClass;
7215 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7216 break;
7218 Opc = AArch64::MLAv2i32;
7219 RC = &AArch64::FPR64RegClass;
7220 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7221 break;
7223 Opc = AArch64::MLAv4i32;
7224 RC = &AArch64::FPR128RegClass;
7225 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7226 break;
7228 Opc = AArch64::MLAv4i32;
7229 RC = &AArch64::FPR128RegClass;
7230 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7231 break;
7232
7234 Opc = AArch64::MLAv8i8;
7235 RC = &AArch64::FPR64RegClass;
7236 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7237 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
7238 RC);
7239 break;
7241 Opc = AArch64::MLSv8i8;
7242 RC = &AArch64::FPR64RegClass;
7243 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7244 break;
7246 Opc = AArch64::MLAv16i8;
7247 RC = &AArch64::FPR128RegClass;
7248 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7249 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
7250 RC);
7251 break;
7253 Opc = AArch64::MLSv16i8;
7254 RC = &AArch64::FPR128RegClass;
7255 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7256 break;
7258 Opc = AArch64::MLAv4i16;
7259 RC = &AArch64::FPR64RegClass;
7260 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7261 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7262 RC);
7263 break;
7265 Opc = AArch64::MLSv4i16;
7266 RC = &AArch64::FPR64RegClass;
7267 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7268 break;
7270 Opc = AArch64::MLAv8i16;
7271 RC = &AArch64::FPR128RegClass;
7272 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7273 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7274 RC);
7275 break;
7277 Opc = AArch64::MLSv8i16;
7278 RC = &AArch64::FPR128RegClass;
7279 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7280 break;
7282 Opc = AArch64::MLAv2i32;
7283 RC = &AArch64::FPR64RegClass;
7284 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7285 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7286 RC);
7287 break;
7289 Opc = AArch64::MLSv2i32;
7290 RC = &AArch64::FPR64RegClass;
7291 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7292 break;
7294 Opc = AArch64::MLAv4i32;
7295 RC = &AArch64::FPR128RegClass;
7296 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7297 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7298 RC);
7299 break;
7301 Opc = AArch64::MLSv4i32;
7302 RC = &AArch64::FPR128RegClass;
7303 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7304 break;
7305
7307 Opc = AArch64::MLAv4i16_indexed;
7308 RC = &AArch64::FPR64RegClass;
7309 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7310 break;
7312 Opc = AArch64::MLAv4i16_indexed;
7313 RC = &AArch64::FPR64RegClass;
7314 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7315 break;
7317 Opc = AArch64::MLAv8i16_indexed;
7318 RC = &AArch64::FPR128RegClass;
7319 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7320 break;
7322 Opc = AArch64::MLAv8i16_indexed;
7323 RC = &AArch64::FPR128RegClass;
7324 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7325 break;
7327 Opc = AArch64::MLAv2i32_indexed;
7328 RC = &AArch64::FPR64RegClass;
7329 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7330 break;
7332 Opc = AArch64::MLAv2i32_indexed;
7333 RC = &AArch64::FPR64RegClass;
7334 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7335 break;
7337 Opc = AArch64::MLAv4i32_indexed;
7338 RC = &AArch64::FPR128RegClass;
7339 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7340 break;
7342 Opc = AArch64::MLAv4i32_indexed;
7343 RC = &AArch64::FPR128RegClass;
7344 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7345 break;
7346
7348 Opc = AArch64::MLAv4i16_indexed;
7349 RC = &AArch64::FPR64RegClass;
7350 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7351 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7352 RC);
7353 break;
7355 Opc = AArch64::MLSv4i16_indexed;
7356 RC = &AArch64::FPR64RegClass;
7357 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7358 break;
7360 Opc = AArch64::MLAv8i16_indexed;
7361 RC = &AArch64::FPR128RegClass;
7362 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7363 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7364 RC);
7365 break;
7367 Opc = AArch64::MLSv8i16_indexed;
7368 RC = &AArch64::FPR128RegClass;
7369 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7370 break;
7372 Opc = AArch64::MLAv2i32_indexed;
7373 RC = &AArch64::FPR64RegClass;
7374 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7375 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7376 RC);
7377 break;
7379 Opc = AArch64::MLSv2i32_indexed;
7380 RC = &AArch64::FPR64RegClass;
7381 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7382 break;
7384 Opc = AArch64::MLAv4i32_indexed;
7385 RC = &AArch64::FPR128RegClass;
7386 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7387 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7388 RC);
7389 break;
7391 Opc = AArch64::MLSv4i32_indexed;
7392 RC = &AArch64::FPR128RegClass;
7393 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7394 break;
7395
7396 // Floating Point Support
7398 Opc = AArch64::FMADDHrrr;
7399 RC = &AArch64::FPR16RegClass;
7400 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7401 break;
7403 Opc = AArch64::FMADDSrrr;
7404 RC = &AArch64::FPR32RegClass;
7405 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7406 break;
7408 Opc = AArch64::FMADDDrrr;
7409 RC = &AArch64::FPR64RegClass;
7410 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7411 break;
7412
7414 Opc = AArch64::FMADDHrrr;
7415 RC = &AArch64::FPR16RegClass;
7416 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7417 break;
7419 Opc = AArch64::FMADDSrrr;
7420 RC = &AArch64::FPR32RegClass;
7421 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7422 break;
7424 Opc = AArch64::FMADDDrrr;
7425 RC = &AArch64::FPR64RegClass;
7426 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7427 break;
7428
7430 Opc = AArch64::FMLAv1i32_indexed;
7431 RC = &AArch64::FPR32RegClass;
7432 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7433 FMAInstKind::Indexed);
7434 break;
7436 Opc = AArch64::FMLAv1i32_indexed;
7437 RC = &AArch64::FPR32RegClass;
7438 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7439 FMAInstKind::Indexed);
7440 break;
7441
7443 Opc = AArch64::FMLAv1i64_indexed;
7444 RC = &AArch64::FPR64RegClass;
7445 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7446 FMAInstKind::Indexed);
7447 break;
7449 Opc = AArch64::FMLAv1i64_indexed;
7450 RC = &AArch64::FPR64RegClass;
7451 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7452 FMAInstKind::Indexed);
7453 break;
7454
7456 RC = &AArch64::FPR64RegClass;
7457 Opc = AArch64::FMLAv4i16_indexed;
7458 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7459 FMAInstKind::Indexed);
7460 break;
7462 RC = &AArch64::FPR64RegClass;
7463 Opc = AArch64::FMLAv4f16;
7464 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7465 FMAInstKind::Accumulator);
7466 break;
7468 RC = &AArch64::FPR64RegClass;
7469 Opc = AArch64::FMLAv4i16_indexed;
7470 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7471 FMAInstKind::Indexed);
7472 break;
7474 RC = &AArch64::FPR64RegClass;
7475 Opc = AArch64::FMLAv4f16;
7476 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7477 FMAInstKind::Accumulator);
7478 break;
7479
7482 RC = &AArch64::FPR64RegClass;
7484 Opc = AArch64::FMLAv2i32_indexed;
7485 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7486 FMAInstKind::Indexed);
7487 } else {
7488 Opc = AArch64::FMLAv2f32;
7489 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7490 FMAInstKind::Accumulator);
7491 }
7492 break;
7495 RC = &AArch64::FPR64RegClass;
7497 Opc = AArch64::FMLAv2i32_indexed;
7498 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7499 FMAInstKind::Indexed);
7500 } else {
7501 Opc = AArch64::FMLAv2f32;
7502 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7503 FMAInstKind::Accumulator);
7504 }
7505 break;
7506
7508 RC = &AArch64::FPR128RegClass;
7509 Opc = AArch64::FMLAv8i16_indexed;
7510 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7511 FMAInstKind::Indexed);
7512 break;
7514 RC = &AArch64::FPR128RegClass;
7515 Opc = AArch64::FMLAv8f16;
7516 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7517 FMAInstKind::Accumulator);
7518 break;
7520 RC = &AArch64::FPR128RegClass;
7521 Opc = AArch64::FMLAv8i16_indexed;
7522 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7523 FMAInstKind::Indexed);
7524 break;
7526 RC = &AArch64::FPR128RegClass;
7527 Opc = AArch64::FMLAv8f16;
7528 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7529 FMAInstKind::Accumulator);
7530 break;
7531
7534 RC = &AArch64::FPR128RegClass;
7536 Opc = AArch64::FMLAv2i64_indexed;
7537 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7538 FMAInstKind::Indexed);
7539 } else {
7540 Opc = AArch64::FMLAv2f64;
7541 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7542 FMAInstKind::Accumulator);
7543 }
7544 break;
7547 RC = &AArch64::FPR128RegClass;
7549 Opc = AArch64::FMLAv2i64_indexed;
7550 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7551 FMAInstKind::Indexed);
7552 } else {
7553 Opc = AArch64::FMLAv2f64;
7554 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7555 FMAInstKind::Accumulator);
7556 }
7557 break;
7558
7561 RC = &AArch64::FPR128RegClass;
7563 Opc = AArch64::FMLAv4i32_indexed;
7564 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7565 FMAInstKind::Indexed);
7566 } else {
7567 Opc = AArch64::FMLAv4f32;
7568 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7569 FMAInstKind::Accumulator);
7570 }
7571 break;
7572
7575 RC = &AArch64::FPR128RegClass;
7577 Opc = AArch64::FMLAv4i32_indexed;
7578 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7579 FMAInstKind::Indexed);
7580 } else {
7581 Opc = AArch64::FMLAv4f32;
7582 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7583 FMAInstKind::Accumulator);
7584 }
7585 break;
7586
7588 Opc = AArch64::FNMSUBHrrr;
7589 RC = &AArch64::FPR16RegClass;
7590 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7591 break;
7593 Opc = AArch64::FNMSUBSrrr;
7594 RC = &AArch64::FPR32RegClass;
7595 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7596 break;
7598 Opc = AArch64::FNMSUBDrrr;
7599 RC = &AArch64::FPR64RegClass;
7600 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7601 break;
7602
7604 Opc = AArch64::FNMADDHrrr;
7605 RC = &AArch64::FPR16RegClass;
7606 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7607 break;
7609 Opc = AArch64::FNMADDSrrr;
7610 RC = &AArch64::FPR32RegClass;
7611 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7612 break;
7614 Opc = AArch64::FNMADDDrrr;
7615 RC = &AArch64::FPR64RegClass;
7616 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7617 break;
7618
7620 Opc = AArch64::FMSUBHrrr;
7621 RC = &AArch64::FPR16RegClass;
7622 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7623 break;
7625 Opc = AArch64::FMSUBSrrr;
7626 RC = &AArch64::FPR32RegClass;
7627 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7628 break;
7630 Opc = AArch64::FMSUBDrrr;
7631 RC = &AArch64::FPR64RegClass;
7632 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7633 break;
7634
7636 Opc = AArch64::FMLSv1i32_indexed;
7637 RC = &AArch64::FPR32RegClass;
7638 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7639 FMAInstKind::Indexed);
7640 break;
7641
7643 Opc = AArch64::FMLSv1i64_indexed;
7644 RC = &AArch64::FPR64RegClass;
7645 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7646 FMAInstKind::Indexed);
7647 break;
7648
7651 RC = &AArch64::FPR64RegClass;
7652 Register NewVR = MRI.createVirtualRegister(RC);
7653 MachineInstrBuilder MIB1 =
7654 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
7655 .add(Root.getOperand(2));
7656 InsInstrs.push_back(MIB1);
7657 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7659 Opc = AArch64::FMLAv4f16;
7660 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7661 FMAInstKind::Accumulator, &NewVR);
7662 } else {
7663 Opc = AArch64::FMLAv4i16_indexed;
7664 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7665 FMAInstKind::Indexed, &NewVR);
7666 }
7667 break;
7668 }
7670 RC = &AArch64::FPR64RegClass;
7671 Opc = AArch64::FMLSv4f16;
7672 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7673 FMAInstKind::Accumulator);
7674 break;
7676 RC = &AArch64::FPR64RegClass;
7677 Opc = AArch64::FMLSv4i16_indexed;
7678 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7679 FMAInstKind::Indexed);
7680 break;
7681
7684 RC = &AArch64::FPR64RegClass;
7686 Opc = AArch64::FMLSv2i32_indexed;
7687 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7688 FMAInstKind::Indexed);
7689 } else {
7690 Opc = AArch64::FMLSv2f32;
7691 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7692 FMAInstKind::Accumulator);
7693 }
7694 break;
7695
7698 RC = &AArch64::FPR128RegClass;
7699 Register NewVR = MRI.createVirtualRegister(RC);
7700 MachineInstrBuilder MIB1 =
7701 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
7702 .add(Root.getOperand(2));
7703 InsInstrs.push_back(MIB1);
7704 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7706 Opc = AArch64::FMLAv8f16;
7707 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7708 FMAInstKind::Accumulator, &NewVR);
7709 } else {
7710 Opc = AArch64::FMLAv8i16_indexed;
7711 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7712 FMAInstKind::Indexed, &NewVR);
7713 }
7714 break;
7715 }
7717 RC = &AArch64::FPR128RegClass;
7718 Opc = AArch64::FMLSv8f16;
7719 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7720 FMAInstKind::Accumulator);
7721 break;
7723 RC = &AArch64::FPR128RegClass;
7724 Opc = AArch64::FMLSv8i16_indexed;
7725 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7726 FMAInstKind::Indexed);
7727 break;
7728
7731 RC = &AArch64::FPR128RegClass;
7733 Opc = AArch64::FMLSv2i64_indexed;
7734 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7735 FMAInstKind::Indexed);
7736 } else {
7737 Opc = AArch64::FMLSv2f64;
7738 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7739 FMAInstKind::Accumulator);
7740 }
7741 break;
7742
7745 RC = &AArch64::FPR128RegClass;
7747 Opc = AArch64::FMLSv4i32_indexed;
7748 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7749 FMAInstKind::Indexed);
7750 } else {
7751 Opc = AArch64::FMLSv4f32;
7752 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7753 FMAInstKind::Accumulator);
7754 }
7755 break;
7758 RC = &AArch64::FPR64RegClass;
7759 Register NewVR = MRI.createVirtualRegister(RC);
7760 MachineInstrBuilder MIB1 =
7761 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
7762 .add(Root.getOperand(2));
7763 InsInstrs.push_back(MIB1);
7764 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7766 Opc = AArch64::FMLAv2i32_indexed;
7767 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7768 FMAInstKind::Indexed, &NewVR);
7769 } else {
7770 Opc = AArch64::FMLAv2f32;
7771 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7772 FMAInstKind::Accumulator, &NewVR);
7773 }
7774 break;
7775 }
7778 RC = &AArch64::FPR128RegClass;
7779 Register NewVR = MRI.createVirtualRegister(RC);
7780 MachineInstrBuilder MIB1 =
7781 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
7782 .add(Root.getOperand(2));
7783 InsInstrs.push_back(MIB1);
7784 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7786 Opc = AArch64::FMLAv4i32_indexed;
7787 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7788 FMAInstKind::Indexed, &NewVR);
7789 } else {
7790 Opc = AArch64::FMLAv4f32;
7791 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7792 FMAInstKind::Accumulator, &NewVR);
7793 }
7794 break;
7795 }
7798 RC = &AArch64::FPR128RegClass;
7799 Register NewVR = MRI.createVirtualRegister(RC);
7800 MachineInstrBuilder MIB1 =
7801 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
7802 .add(Root.getOperand(2));
7803 InsInstrs.push_back(MIB1);
7804 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7806 Opc = AArch64::FMLAv2i64_indexed;
7807 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7808 FMAInstKind::Indexed, &NewVR);
7809 } else {
7810 Opc = AArch64::FMLAv2f64;
7811 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7812 FMAInstKind::Accumulator, &NewVR);
7813 }
7814 break;
7815 }
7818 unsigned IdxDupOp =
7820 : 2;
7821 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
7822 &AArch64::FPR128RegClass, MRI);
7823 break;
7824 }
7827 unsigned IdxDupOp =
7829 : 2;
7830 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
7831 &AArch64::FPR128RegClass, MRI);
7832 break;
7833 }
7836 unsigned IdxDupOp =
7838 : 2;
7839 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
7840 &AArch64::FPR128_loRegClass, MRI);
7841 break;
7842 }
7845 unsigned IdxDupOp =
7847 : 2;
7848 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
7849 &AArch64::FPR128RegClass, MRI);
7850 break;
7851 }
7854 unsigned IdxDupOp =
7856 : 2;
7857 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
7858 &AArch64::FPR128_loRegClass, MRI);
7859 break;
7860 }
7862 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
7863 break;
7864 }
7865
7866 } // end switch (Pattern)
7867 // Record MUL and ADD/SUB for deletion
7868 if (MUL)
7869 DelInstrs.push_back(MUL);
7870 DelInstrs.push_back(&Root);
7871
7872 // Set the flags on the inserted instructions to be the merged flags of the
7873 // instructions that we have combined.
7874 uint32_t Flags = Root.getFlags();
7875 if (MUL)
7876 Flags = Root.mergeFlagsWith(*MUL);
7877 for (auto *MI : InsInstrs)
7878 MI->setFlags(Flags);
7879}
7880
7881/// Replace csincr-branch sequence by simple conditional branch
7882///
7883/// Examples:
7884/// 1. \code
7885/// csinc w9, wzr, wzr, <condition code>
7886/// tbnz w9, #0, 0x44
7887/// \endcode
7888/// to
7889/// \code
7890/// b.<inverted condition code>
7891/// \endcode
7892///
7893/// 2. \code
7894/// csinc w9, wzr, wzr, <condition code>
7895/// tbz w9, #0, 0x44
7896/// \endcode
7897/// to
7898/// \code
7899/// b.<condition code>
7900/// \endcode
7901///
7902/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
7903/// compare's constant operand is power of 2.
7904///
7905/// Examples:
7906/// \code
7907/// and w8, w8, #0x400
7908/// cbnz w8, L1
7909/// \endcode
7910/// to
7911/// \code
7912/// tbnz w8, #10, L1
7913/// \endcode
7914///
7915/// \param MI Conditional Branch
7916/// \return True when the simple conditional branch is generated
7917///
7919 bool IsNegativeBranch = false;
7920 bool IsTestAndBranch = false;
7921 unsigned TargetBBInMI = 0;
7922 switch (MI.getOpcode()) {
7923 default:
7924 llvm_unreachable("Unknown branch instruction?");
7925 case AArch64::Bcc:
7926 return false;
7927 case AArch64::CBZW:
7928 case AArch64::CBZX:
7929 TargetBBInMI = 1;
7930 break;
7931 case AArch64::CBNZW:
7932 case AArch64::CBNZX:
7933 TargetBBInMI = 1;
7934 IsNegativeBranch = true;
7935 break;
7936 case AArch64::TBZW:
7937 case AArch64::TBZX:
7938 TargetBBInMI = 2;
7939 IsTestAndBranch = true;
7940 break;
7941 case AArch64::TBNZW:
7942 case AArch64::TBNZX:
7943 TargetBBInMI = 2;
7944 IsNegativeBranch = true;
7945 IsTestAndBranch = true;
7946 break;
7947 }
7948 // So we increment a zero register and test for bits other
7949 // than bit 0? Conservatively bail out in case the verifier
7950 // missed this case.
7951 if (IsTestAndBranch && MI.getOperand(1).getImm())
7952 return false;
7953
7954 // Find Definition.
7955 assert(MI.getParent() && "Incomplete machine instruciton\n");
7956 MachineBasicBlock *MBB = MI.getParent();
7957 MachineFunction *MF = MBB->getParent();
7959 Register VReg = MI.getOperand(0).getReg();
7960 if (!VReg.isVirtual())
7961 return false;
7962
7963 MachineInstr *DefMI = MRI->getVRegDef(VReg);
7964
7965 // Look through COPY instructions to find definition.
7966 while (DefMI->isCopy()) {
7967 Register CopyVReg = DefMI->getOperand(1).getReg();
7968 if (!MRI->hasOneNonDBGUse(CopyVReg))
7969 return false;
7970 if (!MRI->hasOneDef(CopyVReg))
7971 return false;
7972 DefMI = MRI->getVRegDef(CopyVReg);
7973 }
7974
7975 switch (DefMI->getOpcode()) {
7976 default:
7977 return false;
7978 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
7979 case AArch64::ANDWri:
7980 case AArch64::ANDXri: {
7981 if (IsTestAndBranch)
7982 return false;
7983 if (DefMI->getParent() != MBB)
7984 return false;
7985 if (!MRI->hasOneNonDBGUse(VReg))
7986 return false;
7987
7988 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
7990 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
7991 if (!isPowerOf2_64(Mask))
7992 return false;
7993
7995 Register NewReg = MO.getReg();
7996 if (!NewReg.isVirtual())
7997 return false;
7998
7999 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8000
8001 MachineBasicBlock &RefToMBB = *MBB;
8002 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
8003 DebugLoc DL = MI.getDebugLoc();
8004 unsigned Imm = Log2_64(Mask);
8005 unsigned Opc = (Imm < 32)
8006 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8007 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8008 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
8009 .addReg(NewReg)
8010 .addImm(Imm)
8011 .addMBB(TBB);
8012 // Register lives on to the CBZ now.
8013 MO.setIsKill(false);
8014
8015 // For immediate smaller than 32, we need to use the 32-bit
8016 // variant (W) in all cases. Indeed the 64-bit variant does not
8017 // allow to encode them.
8018 // Therefore, if the input register is 64-bit, we need to take the
8019 // 32-bit sub-part.
8020 if (!Is32Bit && Imm < 32)
8021 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
8022 MI.eraseFromParent();
8023 return true;
8024 }
8025 // Look for CSINC
8026 case AArch64::CSINCWr:
8027 case AArch64::CSINCXr: {
8028 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
8029 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
8030 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
8031 DefMI->getOperand(2).getReg() == AArch64::XZR))
8032 return false;
8033
8034 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
8035 return false;
8036
8038 // Convert only when the condition code is not modified between
8039 // the CSINC and the branch. The CC may be used by other
8040 // instructions in between.
8042 return false;
8043 MachineBasicBlock &RefToMBB = *MBB;
8044 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
8045 DebugLoc DL = MI.getDebugLoc();
8046 if (IsNegativeBranch)
8048 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
8049 MI.eraseFromParent();
8050 return true;
8051 }
8052 }
8053}
8054
8055std::pair<unsigned, unsigned>
8057 const unsigned Mask = AArch64II::MO_FRAGMENT;
8058 return std::make_pair(TF & Mask, TF & ~Mask);
8059}
8060
8063 using namespace AArch64II;
8064
8065 static const std::pair<unsigned, const char *> TargetFlags[] = {
8066 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8067 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8068 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8069 {MO_HI12, "aarch64-hi12"}};
8070 return ArrayRef(TargetFlags);
8071}
8072
8075 using namespace AArch64II;
8076
8077 static const std::pair<unsigned, const char *> TargetFlags[] = {
8078 {MO_COFFSTUB, "aarch64-coffstub"},
8079 {MO_GOT, "aarch64-got"},
8080 {MO_NC, "aarch64-nc"},
8081 {MO_S, "aarch64-s"},
8082 {MO_TLS, "aarch64-tls"},
8083 {MO_DLLIMPORT, "aarch64-dllimport"},
8084 {MO_PREL, "aarch64-prel"},
8085 {MO_TAGGED, "aarch64-tagged"},
8086 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8087 };
8088 return ArrayRef(TargetFlags);
8089}
8090
8093 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8094 {{MOSuppressPair, "aarch64-suppress-pair"},
8095 {MOStridedAccess, "aarch64-strided-access"}};
8096 return ArrayRef(TargetFlags);
8097}
8098
8099/// Constants defining how certain sequences should be outlined.
8100/// This encompasses how an outlined function should be called, and what kind of
8101/// frame should be emitted for that outlined function.
8102///
8103/// \p MachineOutlinerDefault implies that the function should be called with
8104/// a save and restore of LR to the stack.
8105///
8106/// That is,
8107///
8108/// I1 Save LR OUTLINED_FUNCTION:
8109/// I2 --> BL OUTLINED_FUNCTION I1
8110/// I3 Restore LR I2
8111/// I3
8112/// RET
8113///
8114/// * Call construction overhead: 3 (save + BL + restore)
8115/// * Frame construction overhead: 1 (ret)
8116/// * Requires stack fixups? Yes
8117///
8118/// \p MachineOutlinerTailCall implies that the function is being created from
8119/// a sequence of instructions ending in a return.
8120///
8121/// That is,
8122///
8123/// I1 OUTLINED_FUNCTION:
8124/// I2 --> B OUTLINED_FUNCTION I1
8125/// RET I2
8126/// RET
8127///
8128/// * Call construction overhead: 1 (B)
8129/// * Frame construction overhead: 0 (Return included in sequence)
8130/// * Requires stack fixups? No
8131///
8132/// \p MachineOutlinerNoLRSave implies that the function should be called using
8133/// a BL instruction, but doesn't require LR to be saved and restored. This
8134/// happens when LR is known to be dead.
8135///
8136/// That is,
8137///
8138/// I1 OUTLINED_FUNCTION:
8139/// I2 --> BL OUTLINED_FUNCTION I1
8140/// I3 I2
8141/// I3
8142/// RET
8143///
8144/// * Call construction overhead: 1 (BL)
8145/// * Frame construction overhead: 1 (RET)
8146/// * Requires stack fixups? No
8147///
8148/// \p MachineOutlinerThunk implies that the function is being created from
8149/// a sequence of instructions ending in a call. The outlined function is
8150/// called with a BL instruction, and the outlined function tail-calls the
8151/// original call destination.
8152///
8153/// That is,
8154///
8155/// I1 OUTLINED_FUNCTION:
8156/// I2 --> BL OUTLINED_FUNCTION I1
8157/// BL f I2
8158/// B f
8159/// * Call construction overhead: 1 (BL)
8160/// * Frame construction overhead: 0
8161/// * Requires stack fixups? No
8162///
8163/// \p MachineOutlinerRegSave implies that the function should be called with a
8164/// save and restore of LR to an available register. This allows us to avoid
8165/// stack fixups. Note that this outlining variant is compatible with the
8166/// NoLRSave case.
8167///
8168/// That is,
8169///
8170/// I1 Save LR OUTLINED_FUNCTION:
8171/// I2 --> BL OUTLINED_FUNCTION I1
8172/// I3 Restore LR I2
8173/// I3
8174/// RET
8175///
8176/// * Call construction overhead: 3 (save + BL + restore)
8177/// * Frame construction overhead: 1 (ret)
8178/// * Requires stack fixups? No
8180 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
8181 MachineOutlinerTailCall, /// Only emit a branch.
8182 MachineOutlinerNoLRSave, /// Emit a call and return.
8183 MachineOutlinerThunk, /// Emit a call and tail-call.
8184 MachineOutlinerRegSave /// Same as default, but save to a register.
8186
8190 UnsafeRegsDead = 0x8
8192
8194AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
8195 MachineFunction *MF = C.getMF();
8197 const AArch64RegisterInfo *ARI =
8198 static_cast<const AArch64RegisterInfo *>(&TRI);
8199 // Check if there is an available register across the sequence that we can
8200 // use.
8201 for (unsigned Reg : AArch64::GPR64RegClass) {
8202 if (!ARI->isReservedReg(*MF, Reg) &&
8203 Reg != AArch64::LR && // LR is not reserved, but don't use it.
8204 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
8205 Reg != AArch64::X17 && // Ditto for X17.
8206 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
8207 C.isAvailableInsideSeq(Reg, TRI))
8208 return Reg;
8209 }
8210 return Register();
8211}
8212
8213static bool
8215 const outliner::Candidate &b) {
8216 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8217 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8218
8219 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
8220 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
8221}
8222
8223static bool
8225 const outliner::Candidate &b) {
8226 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8227 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8228
8229 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
8230}
8231
8233 const outliner::Candidate &b) {
8234 const AArch64Subtarget &SubtargetA =
8236 const AArch64Subtarget &SubtargetB =
8237 b.getMF()->getSubtarget<AArch64Subtarget>();
8238 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
8239}
8240
8241std::optional<outliner::OutlinedFunction>
8243 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
8244 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
8245
8246 unsigned SequenceSize = 0;
8247 for (auto &MI : FirstCand)
8248 SequenceSize += getInstSizeInBytes(MI);
8249
8250 unsigned NumBytesToCreateFrame = 0;
8251
8252 // We only allow outlining for functions having exactly matching return
8253 // address signing attributes, i.e., all share the same value for the
8254 // attribute "sign-return-address" and all share the same type of key they
8255 // are signed with.
8256 // Additionally we require all functions to simultaniously either support
8257 // v8.3a features or not. Otherwise an outlined function could get signed
8258 // using dedicated v8.3 instructions and a call from a function that doesn't
8259 // support v8.3 instructions would therefore be invalid.
8260 if (std::adjacent_find(
8261 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
8262 [](const outliner::Candidate &a, const outliner::Candidate &b) {
8263 // Return true if a and b are non-equal w.r.t. return address
8264 // signing or support of v8.3a features
8265 if (outliningCandidatesSigningScopeConsensus(a, b) &&
8266 outliningCandidatesSigningKeyConsensus(a, b) &&
8267 outliningCandidatesV8_3OpsConsensus(a, b)) {
8268 return false;
8269 }
8270 return true;
8271 }) != RepeatedSequenceLocs.end()) {
8272 return std::nullopt;
8273 }
8274
8275 // Since at this point all candidates agree on their return address signing
8276 // picking just one is fine. If the candidate functions potentially sign their
8277 // return addresses, the outlined function should do the same. Note that in
8278 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
8279 // not certainly true that the outlined function will have to sign its return
8280 // address but this decision is made later, when the decision to outline
8281 // has already been made.
8282 // The same holds for the number of additional instructions we need: On
8283 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8284 // necessary. However, at this point we don't know if the outlined function
8285 // will have a RET instruction so we assume the worst.
8286 const TargetRegisterInfo &TRI = getRegisterInfo();
8287 // Performing a tail call may require extra checks when PAuth is enabled.
8288 // If PAuth is disabled, set it to zero for uniformity.
8289 unsigned NumBytesToCheckLRInTCEpilogue = 0;
8290 if (FirstCand.getMF()
8291 ->getInfo<AArch64FunctionInfo>()
8292 ->shouldSignReturnAddress(true)) {
8293 // One PAC and one AUT instructions
8294 NumBytesToCreateFrame += 8;
8295
8296 // PAuth is enabled - set extra tail call cost, if any.
8297 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod();
8298 NumBytesToCheckLRInTCEpilogue =
8300 // Checking the authenticated LR value may significantly impact
8301 // SequenceSize, so account for it for more precise results.
8302 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
8303 SequenceSize += NumBytesToCheckLRInTCEpilogue;
8304
8305 // We have to check if sp modifying instructions would get outlined.
8306 // If so we only allow outlining if sp is unchanged overall, so matching
8307 // sub and add instructions are okay to outline, all other sp modifications
8308 // are not
8309 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
8310 int SPValue = 0;
8311 for (auto &MI : C) {
8312 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
8313 switch (MI.getOpcode()) {
8314 case AArch64::ADDXri:
8315 case AArch64::ADDWri:
8316 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8317 assert(MI.getOperand(2).isImm() &&
8318 "Expected operand to be immediate");
8319 assert(MI.getOperand(1).isReg() &&
8320 "Expected operand to be a register");
8321 // Check if the add just increments sp. If so, we search for
8322 // matching sub instructions that decrement sp. If not, the
8323 // modification is illegal
8324 if (MI.getOperand(1).getReg() == AArch64::SP)
8325 SPValue += MI.getOperand(2).getImm();
8326 else
8327 return true;
8328 break;
8329 case AArch64::SUBXri:
8330 case AArch64::SUBWri:
8331 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8332 assert(MI.getOperand(2).isImm() &&
8333 "Expected operand to be immediate");
8334 assert(MI.getOperand(1).isReg() &&
8335 "Expected operand to be a register");
8336 // Check if the sub just decrements sp. If so, we search for
8337 // matching add instructions that increment sp. If not, the
8338 // modification is illegal
8339 if (MI.getOperand(1).getReg() == AArch64::SP)
8340 SPValue -= MI.getOperand(2).getImm();
8341 else
8342 return true;
8343 break;
8344 default:
8345 return true;
8346 }
8347 }
8348 }
8349 if (SPValue)
8350 return true;
8351 return false;
8352 };
8353 // Remove candidates with illegal stack modifying instructions
8354 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
8355
8356 // If the sequence doesn't have enough candidates left, then we're done.
8357 if (RepeatedSequenceLocs.size() < 2)
8358 return std::nullopt;
8359 }
8360
8361 // Properties about candidate MBBs that hold for all of them.
8362 unsigned FlagsSetInAll = 0xF;
8363
8364 // Compute liveness information for each candidate, and set FlagsSetInAll.
8365 for (outliner::Candidate &C : RepeatedSequenceLocs)
8366 FlagsSetInAll &= C.Flags;
8367
8368 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
8369
8370 // Helper lambda which sets call information for every candidate.
8371 auto SetCandidateCallInfo =
8372 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
8373 for (outliner::Candidate &C : RepeatedSequenceLocs)
8374 C.setCallInfo(CallID, NumBytesForCall);
8375 };
8376
8377 unsigned FrameID = MachineOutlinerDefault;
8378 NumBytesToCreateFrame += 4;
8379
8380 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
8381 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
8382 });
8383
8384 // We check to see if CFI Instructions are present, and if they are
8385 // we find the number of CFI Instructions in the candidates.
8386 unsigned CFICount = 0;
8387 for (auto &I : RepeatedSequenceLocs[0]) {
8388 if (I.isCFIInstruction())
8389 CFICount++;
8390 }
8391
8392 // We compare the number of found CFI Instructions to the number of CFI
8393 // instructions in the parent function for each candidate. We must check this
8394 // since if we outline one of the CFI instructions in a function, we have to
8395 // outline them all for correctness. If we do not, the address offsets will be
8396 // incorrect between the two sections of the program.
8397 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8398 std::vector<MCCFIInstruction> CFIInstructions =
8399 C.getMF()->getFrameInstructions();
8400
8401 if (CFICount > 0 && CFICount != CFIInstructions.size())
8402 return std::nullopt;
8403 }
8404
8405 // Returns true if an instructions is safe to fix up, false otherwise.
8406 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
8407 if (MI.isCall())
8408 return true;
8409
8410 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
8411 !MI.readsRegister(AArch64::SP, &TRI))
8412 return true;
8413
8414 // Any modification of SP will break our code to save/restore LR.
8415 // FIXME: We could handle some instructions which add a constant
8416 // offset to SP, with a bit more work.
8417 if (MI.modifiesRegister(AArch64::SP, &TRI))
8418 return false;
8419
8420 // At this point, we have a stack instruction that we might need to
8421 // fix up. We'll handle it if it's a load or store.
8422 if (MI.mayLoadOrStore()) {
8423 const MachineOperand *Base; // Filled with the base operand of MI.
8424 int64_t Offset; // Filled with the offset of MI.
8425 bool OffsetIsScalable;
8426
8427 // Does it allow us to offset the base operand and is the base the
8428 // register SP?
8429 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
8430 !Base->isReg() || Base->getReg() != AArch64::SP)
8431 return false;
8432
8433 // Fixe-up code below assumes bytes.
8434 if (OffsetIsScalable)
8435 return false;
8436
8437 // Find the minimum/maximum offset for this instruction and check
8438 // if fixing it up would be in range.
8439 int64_t MinOffset,
8440 MaxOffset; // Unscaled offsets for the instruction.
8441 // The scale to multiply the offsets by.
8442 TypeSize Scale(0U, false), DummyWidth(0U, false);
8443 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
8444
8445 Offset += 16; // Update the offset to what it would be if we outlined.
8446 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
8447 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
8448 return false;
8449
8450 // It's in range, so we can outline it.
8451 return true;
8452 }
8453
8454 // FIXME: Add handling for instructions like "add x0, sp, #8".
8455
8456 // We can't fix it up, so don't outline it.
8457 return false;
8458 };
8459
8460 // True if it's possible to fix up each stack instruction in this sequence.
8461 // Important for frames/call variants that modify the stack.
8462 bool AllStackInstrsSafe = llvm::all_of(FirstCand, IsSafeToFixup);
8463
8464 // If the last instruction in any candidate is a terminator, then we should
8465 // tail call all of the candidates.
8466 if (RepeatedSequenceLocs[0].back().isTerminator()) {
8467 FrameID = MachineOutlinerTailCall;
8468 NumBytesToCreateFrame = 0;
8469 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
8470 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
8471 }
8472
8473 else if (LastInstrOpcode == AArch64::BL ||
8474 ((LastInstrOpcode == AArch64::BLR ||
8475 LastInstrOpcode == AArch64::BLRNoIP) &&
8476 !HasBTI)) {
8477 // FIXME: Do we need to check if the code after this uses the value of LR?
8478 FrameID = MachineOutlinerThunk;
8479 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
8480 SetCandidateCallInfo(MachineOutlinerThunk, 4);
8481 }
8482
8483 else {
8484 // We need to decide how to emit calls + frames. We can always emit the same
8485 // frame if we don't need to save to the stack. If we have to save to the
8486 // stack, then we need a different frame.
8487 unsigned NumBytesNoStackCalls = 0;
8488 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
8489
8490 // Check if we have to save LR.
8491 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8492 bool LRAvailable =
8493 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
8494 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
8495 : true;
8496 // If we have a noreturn caller, then we're going to be conservative and
8497 // say that we have to save LR. If we don't have a ret at the end of the
8498 // block, then we can't reason about liveness accurately.
8499 //
8500 // FIXME: We can probably do better than always disabling this in
8501 // noreturn functions by fixing up the liveness info.
8502 bool IsNoReturn =
8503 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
8504
8505 // Is LR available? If so, we don't need a save.
8506 if (LRAvailable && !IsNoReturn) {
8507 NumBytesNoStackCalls += 4;
8508 C.setCallInfo(MachineOutlinerNoLRSave, 4);
8509 CandidatesWithoutStackFixups.push_back(C);
8510 }
8511
8512 // Is an unused register available? If so, we won't modify the stack, so
8513 // we can outline with the same frame type as those that don't save LR.
8514 else if (findRegisterToSaveLRTo(C)) {
8515 NumBytesNoStackCalls += 12;
8516 C.setCallInfo(MachineOutlinerRegSave, 12);
8517 CandidatesWithoutStackFixups.push_back(C);
8518 }
8519
8520 // Is SP used in the sequence at all? If not, we don't have to modify
8521 // the stack, so we are guaranteed to get the same frame.
8522 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
8523 NumBytesNoStackCalls += 12;
8524 C.setCallInfo(MachineOutlinerDefault, 12);
8525 CandidatesWithoutStackFixups.push_back(C);
8526 }
8527
8528 // If we outline this, we need to modify the stack. Pretend we don't
8529 // outline this by saving all of its bytes.
8530 else {
8531 NumBytesNoStackCalls += SequenceSize;
8532 }
8533 }
8534
8535 // If there are no places where we have to save LR, then note that we
8536 // don't have to update the stack. Otherwise, give every candidate the
8537 // default call type, as long as it's safe to do so.
8538 if (!AllStackInstrsSafe ||
8539 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
8540 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
8541 FrameID = MachineOutlinerNoLRSave;
8542 } else {
8543 SetCandidateCallInfo(MachineOutlinerDefault, 12);
8544
8545 // Bugzilla ID: 46767
8546 // TODO: Check if fixing up the stack more than once is safe so we can
8547 // outline these.
8548 //
8549 // An outline resulting in a caller that requires stack fixups at the
8550 // callsite to a callee that also requires stack fixups can happen when
8551 // there are no available registers at the candidate callsite for a
8552 // candidate that itself also has calls.
8553 //
8554 // In other words if function_containing_sequence in the following pseudo
8555 // assembly requires that we save LR at the point of the call, but there
8556 // are no available registers: in this case we save using SP and as a
8557 // result the SP offsets requires stack fixups by multiples of 16.
8558 //
8559 // function_containing_sequence:
8560 // ...
8561 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8562 // call OUTLINED_FUNCTION_N
8563 // restore LR from SP
8564 // ...
8565 //
8566 // OUTLINED_FUNCTION_N:
8567 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8568 // ...
8569 // bl foo
8570 // restore LR from SP
8571 // ret
8572 //
8573 // Because the code to handle more than one stack fixup does not
8574 // currently have the proper checks for legality, these cases will assert
8575 // in the AArch64 MachineOutliner. This is because the code to do this
8576 // needs more hardening, testing, better checks that generated code is
8577 // legal, etc and because it is only verified to handle a single pass of
8578 // stack fixup.
8579 //
8580 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
8581 // these cases until they are known to be handled. Bugzilla 46767 is
8582 // referenced in comments at the assert site.
8583 //
8584 // To avoid asserting (or generating non-legal code on noassert builds)
8585 // we remove all candidates which would need more than one stack fixup by
8586 // pruning the cases where the candidate has calls while also having no
8587 // available LR and having no available general purpose registers to copy
8588 // LR to (ie one extra stack save/restore).
8589 //
8590 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8591 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
8592 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
8593 return (llvm::any_of(C, IsCall)) &&
8594 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
8595 !findRegisterToSaveLRTo(C));
8596 });
8597 }
8598 }
8599
8600 // If we dropped all of the candidates, bail out here.
8601 if (RepeatedSequenceLocs.size() < 2) {
8602 RepeatedSequenceLocs.clear();
8603 return std::nullopt;
8604 }
8605 }
8606
8607 // Does every candidate's MBB contain a call? If so, then we might have a call
8608 // in the range.
8609 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8610 // Check if the range contains a call. These require a save + restore of the
8611 // link register.
8612 bool ModStackToSaveLR = false;
8613 if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),
8614 [](const MachineInstr &MI) { return MI.isCall(); }))
8615 ModStackToSaveLR = true;
8616
8617 // Handle the last instruction separately. If this is a tail call, then the
8618 // last instruction is a call. We don't want to save + restore in this case.
8619 // However, it could be possible that the last instruction is a call without
8620 // it being valid to tail call this sequence. We should consider this as
8621 // well.
8622 else if (FrameID != MachineOutlinerThunk &&
8623 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
8624 ModStackToSaveLR = true;
8625
8626 if (ModStackToSaveLR) {
8627 // We can't fix up the stack. Bail out.
8628 if (!AllStackInstrsSafe) {
8629 RepeatedSequenceLocs.clear();
8630 return std::nullopt;
8631 }
8632
8633 // Save + restore LR.
8634 NumBytesToCreateFrame += 8;
8635 }
8636 }
8637
8638 // If we have CFI instructions, we can only outline if the outlined section
8639 // can be a tail call
8640 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
8641 return std::nullopt;
8642
8643 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
8644 NumBytesToCreateFrame, FrameID);
8645}
8646
8648 Function &F, std::vector<outliner::Candidate> &Candidates) const {
8649 // If a bunch of candidates reach this point they must agree on their return
8650 // address signing. It is therefore enough to just consider the signing
8651 // behaviour of one of them
8652 const auto &CFn = Candidates.front().getMF()->getFunction();
8653
8654 // Since all candidates belong to the same module, just copy the
8655 // function-level attributes of an arbitrary function.
8656 if (CFn.hasFnAttribute("sign-return-address"))
8657 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
8658 if (CFn.hasFnAttribute("sign-return-address-key"))
8659 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
8660
8661 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
8662}
8663
8665 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
8666 const Function &F = MF.getFunction();
8667
8668 // Can F be deduplicated by the linker? If it can, don't outline from it.
8669 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
8670 return false;
8671
8672 // Don't outline from functions with section markings; the program could
8673 // expect that all the code is in the named section.
8674 // FIXME: Allow outlining from multiple functions with the same section
8675 // marking.
8676 if (F.hasSection())
8677 return false;
8678
8679 // Outlining from functions with redzones is unsafe since the outliner may
8680 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
8681 // outline from it.
8683 if (!AFI || AFI->hasRedZone().value_or(true))
8684 return false;
8685
8686 // FIXME: Teach the outliner to generate/handle Windows unwind info.
8688 return false;
8689
8690 // It's safe to outline from MF.
8691 return true;
8692}
8693
8696 unsigned &Flags) const {
8698 "Must track liveness!");
8700 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
8701 Ranges;
8702 // According to the AArch64 Procedure Call Standard, the following are
8703 // undefined on entry/exit from a function call:
8704 //
8705 // * Registers x16, x17, (and thus w16, w17)
8706 // * Condition codes (and thus the NZCV register)
8707 //
8708 // If any of these registers are used inside or live across an outlined
8709 // function, then they may be modified later, either by the compiler or
8710 // some other tool (like the linker).
8711 //
8712 // To avoid outlining in these situations, partition each block into ranges
8713 // where these registers are dead. We will only outline from those ranges.
8715 auto AreAllUnsafeRegsDead = [&LRU]() {
8716 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
8717 LRU.available(AArch64::NZCV);
8718 };
8719
8720 // We need to know if LR is live across an outlining boundary later on in
8721 // order to decide how we'll create the outlined call, frame, etc.
8722 //
8723 // It's pretty expensive to check this for *every candidate* within a block.
8724 // That's some potentially n^2 behaviour, since in the worst case, we'd need
8725 // to compute liveness from the end of the block for O(n) candidates within
8726 // the block.
8727 //
8728 // So, to improve the average case, let's keep track of liveness from the end
8729 // of the block to the beginning of *every outlinable range*. If we know that
8730 // LR is available in every range we could outline from, then we know that
8731 // we don't need to check liveness for any candidate within that range.
8732 bool LRAvailableEverywhere = true;
8733 // Compute liveness bottom-up.
8734 LRU.addLiveOuts(MBB);
8735 // Update flags that require info about the entire MBB.
8736 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
8737 if (MI.isCall() && !MI.isTerminator())
8738 Flags |= MachineOutlinerMBBFlags::HasCalls;
8739 };
8740 // Range: [RangeBegin, RangeEnd)
8741 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
8742 unsigned RangeLen;
8743 auto CreateNewRangeStartingAt =
8744 [&RangeBegin, &RangeEnd,
8745 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
8746 RangeBegin = NewBegin;
8747 RangeEnd = std::next(RangeBegin);
8748 RangeLen = 0;
8749 };
8750 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
8751 // At least one unsafe register is not dead. We do not want to outline at
8752 // this point. If it is long enough to outline from, save the range
8753 // [RangeBegin, RangeEnd).
8754 if (RangeLen > 1)
8755 Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
8756 };
8757 // Find the first point where all unsafe registers are dead.
8758 // FIND: <safe instr> <-- end of first potential range
8759 // SKIP: <unsafe def>
8760 // SKIP: ... everything between ...
8761 // SKIP: <unsafe use>
8762 auto FirstPossibleEndPt = MBB.instr_rbegin();
8763 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
8764 LRU.stepBackward(*FirstPossibleEndPt);
8765 // Update flags that impact how we outline across the entire block,
8766 // regardless of safety.
8767 UpdateWholeMBBFlags(*FirstPossibleEndPt);
8768 if (AreAllUnsafeRegsDead())
8769 break;
8770 }
8771 // If we exhausted the entire block, we have no safe ranges to outline.
8772 if (FirstPossibleEndPt == MBB.instr_rend())
8773 return Ranges;
8774 // Current range.
8775 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
8776 // StartPt points to the first place where all unsafe registers
8777 // are dead (if there is any such point). Begin partitioning the MBB into
8778 // ranges.
8779 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
8780 LRU.stepBackward(MI);
8781 UpdateWholeMBBFlags(MI);
8782 if (!AreAllUnsafeRegsDead()) {
8783 SaveRangeIfNonEmpty();
8784 CreateNewRangeStartingAt(MI.getIterator());
8785 continue;
8786 }
8787 LRAvailableEverywhere &= LRU.available(AArch64::LR);
8788 RangeBegin = MI.getIterator();
8789 ++RangeLen;
8790 }
8791 // Above loop misses the last (or only) range. If we are still safe, then
8792 // let's save the range.
8793 if (AreAllUnsafeRegsDead())
8794 SaveRangeIfNonEmpty();
8795 if (Ranges.empty())
8796 return Ranges;
8797 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
8798 // the order.
8799 std::reverse(Ranges.begin(), Ranges.end());
8800 // If there is at least one outlinable range where LR is unavailable
8801 // somewhere, remember that.
8802 if (!LRAvailableEverywhere)
8803 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
8804 return Ranges;
8805}
8806
8809 unsigned Flags) const {
8810 MachineInstr &MI = *MIT;
8811 MachineBasicBlock *MBB = MI.getParent();
8812 MachineFunction *MF = MBB->getParent();
8814
8815 // Don't outline anything used for return address signing. The outlined
8816 // function will get signed later if needed
8817 switch (MI.getOpcode()) {
8818 case AArch64::PACM:
8819 case AArch64::PACIASP:
8820 case AArch64::PACIBSP:
8821 case AArch64::PACIASPPC:
8822 case AArch64::PACIBSPPC:
8823 case AArch64::AUTIASP:
8824 case AArch64::AUTIBSP:
8825 case AArch64::AUTIASPPCi:
8826 case AArch64::AUTIASPPCr:
8827 case AArch64::AUTIBSPPCi:
8828 case AArch64::AUTIBSPPCr:
8829 case AArch64::RETAA:
8830 case AArch64::RETAB:
8831 case AArch64::RETAASPPCi:
8832 case AArch64::RETAASPPCr:
8833 case AArch64::RETABSPPCi:
8834 case AArch64::RETABSPPCr:
8835 case AArch64::EMITBKEY:
8836 case AArch64::PAUTH_PROLOGUE:
8837 case AArch64::PAUTH_EPILOGUE:
8839 }
8840
8841 // Don't outline LOHs.
8842 if (FuncInfo->getLOHRelated().count(&MI))
8844
8845 // We can only outline these if we will tail call the outlined function, or
8846 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
8847 // in a tail call.
8848 //
8849 // FIXME: If the proper fixups for the offset are implemented, this should be
8850 // possible.
8851 if (MI.isCFIInstruction())
8853
8854 // Is this a terminator for a basic block?
8855 if (MI.isTerminator())
8856 // TargetInstrInfo::getOutliningType has already filtered out anything
8857 // that would break this, so we can allow it here.
8859
8860 // Make sure none of the operands are un-outlinable.
8861 for (const MachineOperand &MOP : MI.operands()) {
8862 // A check preventing CFI indices was here before, but only CFI
8863 // instructions should have those.
8864 assert(!MOP.isCFIIndex());
8865
8866 // If it uses LR or W30 explicitly, then don't touch it.
8867 if (MOP.isReg() && !MOP.isImplicit() &&
8868 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
8870 }
8871
8872 // Special cases for instructions that can always be outlined, but will fail
8873 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
8874 // be outlined because they don't require a *specific* value to be in LR.
8875 if (MI.getOpcode() == AArch64::ADRP)
8877
8878 // If MI is a call we might be able to outline it. We don't want to outline
8879 // any calls that rely on the position of items on the stack. When we outline
8880 // something containing a call, we have to emit a save and restore of LR in
8881 // the outlined function. Currently, this always happens by saving LR to the
8882 // stack. Thus, if we outline, say, half the parameters for a function call
8883 // plus the call, then we'll break the callee's expectations for the layout
8884 // of the stack.
8885 //
8886 // FIXME: Allow calls to functions which construct a stack frame, as long
8887 // as they don't access arguments on the stack.
8888 // FIXME: Figure out some way to analyze functions defined in other modules.
8889 // We should be able to compute the memory usage based on the IR calling
8890 // convention, even if we can't see the definition.
8891 if (MI.isCall()) {
8892 // Get the function associated with the call. Look at each operand and find
8893 // the one that represents the callee and get its name.
8894 const Function *Callee = nullptr;
8895 for (const MachineOperand &MOP : MI.operands()) {
8896 if (MOP.isGlobal()) {
8897 Callee = dyn_cast<Function>(MOP.getGlobal());
8898 break;
8899 }
8900 }
8901
8902 // Never outline calls to mcount. There isn't any rule that would require
8903 // this, but the Linux kernel's "ftrace" feature depends on it.
8904 if (Callee && Callee->getName() == "\01_mcount")
8906
8907 // If we don't know anything about the callee, assume it depends on the
8908 // stack layout of the caller. In that case, it's only legal to outline
8909 // as a tail-call. Explicitly list the call instructions we know about so we
8910 // don't get unexpected results with call pseudo-instructions.
8911 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
8912 if (MI.getOpcode() == AArch64::BLR ||
8913 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
8914 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
8915
8916 if (!Callee)
8917 return UnknownCallOutlineType;
8918
8919 // We have a function we have information about. Check it if it's something
8920 // can safely outline.
8921 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
8922
8923 // We don't know what's going on with the callee at all. Don't touch it.
8924 if (!CalleeMF)
8925 return UnknownCallOutlineType;
8926
8927 // Check if we know anything about the callee saves on the function. If we
8928 // don't, then don't touch it, since that implies that we haven't
8929 // computed anything about its stack frame yet.
8930 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
8931 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
8932 MFI.getNumObjects() > 0)
8933 return UnknownCallOutlineType;
8934
8935 // At this point, we can say that CalleeMF ought to not pass anything on the
8936 // stack. Therefore, we can outline it.
8938 }
8939
8940 // Don't touch the link register or W30.
8941 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
8942 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
8944
8945 // Don't outline BTI instructions, because that will prevent the outlining
8946 // site from being indirectly callable.
8947 if (hasBTISemantics(MI))
8949
8951}
8952
8953void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
8954 for (MachineInstr &MI : MBB) {
8955 const MachineOperand *Base;
8956 TypeSize Width(0, false);
8957 int64_t Offset;
8958 bool OffsetIsScalable;
8959
8960 // Is this a load or store with an immediate offset with SP as the base?
8961 if (!MI.mayLoadOrStore() ||
8962 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
8963 &RI) ||
8964 (Base->isReg() && Base->getReg() != AArch64::SP))
8965 continue;
8966
8967 // It is, so we have to fix it up.
8968 TypeSize Scale(0U, false);
8969 int64_t Dummy1, Dummy2;
8970
8972 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
8973 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
8974 assert(Scale != 0 && "Unexpected opcode!");
8975 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
8976
8977 // We've pushed the return address to the stack, so add 16 to the offset.
8978 // This is safe, since we already checked if it would overflow when we
8979 // checked if this instruction was legal to outline.
8980 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
8981 StackOffsetOperand.setImm(NewImm);
8982 }
8983}
8984
8986 const AArch64InstrInfo *TII,
8987 bool ShouldSignReturnAddr) {
8988 if (!ShouldSignReturnAddr)
8989 return;
8990
8991 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
8994 TII->get(AArch64::PAUTH_EPILOGUE))
8996}
8997
9000 const outliner::OutlinedFunction &OF) const {
9001
9003
9005 FI->setOutliningStyle("Tail Call");
9007 // For thunk outlining, rewrite the last instruction from a call to a
9008 // tail-call.
9009 MachineInstr *Call = &*--MBB.instr_end();
9010 unsigned TailOpcode;
9011 if (Call->getOpcode() == AArch64::BL) {
9012 TailOpcode = AArch64::TCRETURNdi;
9013 } else {
9014 assert(Call->getOpcode() == AArch64::BLR ||
9015 Call->getOpcode() == AArch64::BLRNoIP);
9016 TailOpcode = AArch64::TCRETURNriALL;
9017 }
9018 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
9019 .add(Call->getOperand(0))
9020 .addImm(0);
9021 MBB.insert(MBB.end(), TC);
9022 Call->eraseFromParent();
9023
9024 FI->setOutliningStyle("Thunk");
9025 }
9026
9027 bool IsLeafFunction = true;
9028
9029 // Is there a call in the outlined range?
9030 auto IsNonTailCall = [](const MachineInstr &MI) {
9031 return MI.isCall() && !MI.isReturn();
9032 };
9033
9034 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
9035 // Fix up the instructions in the range, since we're going to modify the
9036 // stack.
9037
9038 // Bugzilla ID: 46767
9039 // TODO: Check if fixing up twice is safe so we can outline these.
9041 "Can only fix up stack references once");
9042 fixupPostOutline(MBB);
9043
9044 IsLeafFunction = false;
9045
9046 // LR has to be a live in so that we can save it.
9047 if (!MBB.isLiveIn(AArch64::LR))
9048 MBB.addLiveIn(AArch64::LR);
9049
9052
9055 Et = std::prev(MBB.end());
9056
9057 // Insert a save before the outlined region
9058 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9059 .addReg(AArch64::SP, RegState::Define)
9060 .addReg(AArch64::LR)
9061 .addReg(AArch64::SP)
9062 .addImm(-16);
9063 It = MBB.insert(It, STRXpre);
9064
9066 const TargetSubtargetInfo &STI = MF.getSubtarget();
9067 const MCRegisterInfo *MRI = STI.getRegisterInfo();
9068 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
9069
9070 // Add a CFI saying the stack was moved 16 B down.
9071 int64_t StackPosEntry =
9073 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9074 .addCFIIndex(StackPosEntry)
9076
9077 // Add a CFI saying that the LR that we want to find is now 16 B higher
9078 // than before.
9079 int64_t LRPosEntry = MF.addFrameInst(
9080 MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
9081 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9082 .addCFIIndex(LRPosEntry)
9084 }
9085
9086 // Insert a restore before the terminator for the function.
9087 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9088 .addReg(AArch64::SP, RegState::Define)
9089 .addReg(AArch64::LR, RegState::Define)
9090 .addReg(AArch64::SP)
9091 .addImm(16);
9092 Et = MBB.insert(Et, LDRXpost);
9093 }
9094
9095 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
9096
9097 // If this is a tail call outlined function, then there's already a return.
9100 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9101 return;
9102 }
9103
9104 // It's not a tail call, so we have to insert the return ourselves.
9105
9106 // LR has to be a live in so that we can return to it.
9107 if (!MBB.isLiveIn(AArch64::LR))
9108 MBB.addLiveIn(AArch64::LR);
9109
9110 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
9111 .addReg(AArch64::LR);
9112 MBB.insert(MBB.end(), ret);
9113
9114 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9115
9116 FI->setOutliningStyle("Function");
9117
9118 // Did we have to modify the stack by saving the link register?
9120 return;
9121
9122 // We modified the stack.
9123 // Walk over the basic block and fix up all the stack accesses.
9124 fixupPostOutline(MBB);
9125}
9126
9130
9131 // Are we tail calling?
9132 if (C.CallConstructionID == MachineOutlinerTailCall) {
9133 // If yes, then we can just branch to the label.
9134 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
9135 .addGlobalAddress(M.getNamedValue(MF.getName()))
9136 .addImm(0));
9137 return It;
9138 }
9139
9140 // Are we saving the link register?
9141 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
9142 C.CallConstructionID == MachineOutlinerThunk) {
9143 // No, so just insert the call.
9144 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9145 .addGlobalAddress(M.getNamedValue(MF.getName())));
9146 return It;
9147 }
9148
9149 // We want to return the spot where we inserted the call.
9151
9152 // Instructions for saving and restoring LR around the call instruction we're
9153 // going to insert.
9154 MachineInstr *Save;
9155 MachineInstr *Restore;
9156 // Can we save to a register?
9157 if (C.CallConstructionID == MachineOutlinerRegSave) {
9158 // FIXME: This logic should be sunk into a target-specific interface so that
9159 // we don't have to recompute the register.
9160 Register Reg = findRegisterToSaveLRTo(C);
9161 assert(Reg && "No callee-saved register available?");
9162
9163 // LR has to be a live in so that we can save it.
9164 if (!MBB.isLiveIn(AArch64::LR))
9165 MBB.addLiveIn(AArch64::LR);
9166
9167 // Save and restore LR from Reg.
9168 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
9169 .addReg(AArch64::XZR)
9170 .addReg(AArch64::LR)
9171 .addImm(0);
9172 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
9173 .addReg(AArch64::XZR)
9174 .addReg(Reg)
9175 .addImm(0);
9176 } else {
9177 // We have the default case. Save and restore from SP.
9178 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9179 .addReg(AArch64::SP, RegState::Define)
9180 .addReg(AArch64::LR)
9181 .addReg(AArch64::SP)
9182 .addImm(-16);
9183 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9184 .addReg(AArch64::SP, RegState::Define)
9185 .addReg(AArch64::LR, RegState::Define)
9186 .addReg(AArch64::SP)
9187 .addImm(16);
9188 }
9189
9190 It = MBB.insert(It, Save);
9191 It++;
9192
9193 // Insert the call.
9194 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9195 .addGlobalAddress(M.getNamedValue(MF.getName())));
9196 CallPt = It;
9197 It++;
9198
9199 It = MBB.insert(It, Restore);
9200 return CallPt;
9201}
9202
9204 MachineFunction &MF) const {
9205 return MF.getFunction().hasMinSize();
9206}
9207
9210 DebugLoc &DL,
9211 bool AllowSideEffects) const {
9212 const MachineFunction &MF = *MBB.getParent();
9214 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
9215
9216 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
9217 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
9218 } else if (STI.hasSVE()) {
9219 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
9220 .addImm(0)
9221 .addImm(0);
9222 } else {
9223 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
9224 .addImm(0);
9225 }
9226}
9227
9228std::optional<DestSourcePair>
9230
9231 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9232 // and zero immediate operands used as an alias for mov instruction.
9233 if (MI.getOpcode() == AArch64::ORRWrs &&
9234 MI.getOperand(1).getReg() == AArch64::WZR &&
9235 MI.getOperand(3).getImm() == 0x0 &&
9236 // Check that the w->w move is not a zero-extending w->x mov.
9237 (!MI.getOperand(0).getReg().isVirtual() ||
9238 MI.getOperand(0).getSubReg() == 0) &&
9239 (!MI.getOperand(0).getReg().isPhysical() ||
9240 MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
9241 AArch64::X0) == -1))
9242 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9243
9244 if (MI.getOpcode() == AArch64::ORRXrs &&
9245 MI.getOperand(1).getReg() == AArch64::XZR &&
9246 MI.getOperand(3).getImm() == 0x0)
9247 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9248
9249 return std::nullopt;
9250}
9251
9252std::optional<DestSourcePair>
9254 if (MI.getOpcode() == AArch64::ORRWrs &&
9255 MI.getOperand(1).getReg() == AArch64::WZR &&
9256 MI.getOperand(3).getImm() == 0x0)
9257 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9258 return std::nullopt;
9259}
9260
9261std::optional<RegImmPair>
9263 int Sign = 1;
9264 int64_t Offset = 0;
9265
9266 // TODO: Handle cases where Reg is a super- or sub-register of the
9267 // destination register.
9268 const MachineOperand &Op0 = MI.getOperand(0);
9269 if (!Op0.isReg() || Reg != Op0.getReg())
9270 return std::nullopt;
9271
9272 switch (MI.getOpcode()) {
9273 default:
9274 return std::nullopt;
9275 case AArch64::SUBWri:
9276 case AArch64::SUBXri:
9277 case AArch64::SUBSWri:
9278 case AArch64::SUBSXri:
9279 Sign *= -1;
9280 [[fallthrough]];
9281 case AArch64::ADDSWri:
9282 case AArch64::ADDSXri:
9283 case AArch64::ADDWri:
9284 case AArch64::ADDXri: {
9285 // TODO: Third operand can be global address (usually some string).
9286 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
9287 !MI.getOperand(2).isImm())
9288 return std::nullopt;
9289 int Shift = MI.getOperand(3).getImm();
9290 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
9291 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
9292 }
9293 }
9294 return RegImmPair{MI.getOperand(1).getReg(), Offset};
9295}
9296
9297/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9298/// the destination register then, if possible, describe the value in terms of
9299/// the source register.
9300static std::optional<ParamLoadedValue>
9302 const TargetInstrInfo *TII,
9303 const TargetRegisterInfo *TRI) {
9304 auto DestSrc = TII->isCopyLikeInstr(MI);
9305 if (!DestSrc)
9306 return std::nullopt;
9307
9308 Register DestReg = DestSrc->Destination->getReg();
9309 Register SrcReg = DestSrc->Source->getReg();
9310
9311 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
9312
9313 // If the described register is the destination, just return the source.
9314 if (DestReg == DescribedReg)
9315 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9316
9317 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9318 if (MI.getOpcode() == AArch64::ORRWrs &&
9319 TRI->isSuperRegister(DestReg, DescribedReg))
9320 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9321
9322 // We may need to describe the lower part of a ORRXrs move.
9323 if (MI.getOpcode() == AArch64::ORRXrs &&
9324 TRI->isSubRegister(DestReg, DescribedReg)) {
9325 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
9326 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
9327 }
9328
9329 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
9330 "Unhandled ORR[XW]rs copy case");
9331
9332 return std::nullopt;
9333}
9334
9336 // Functions cannot be split to different sections on AArch64 if they have
9337 // a red zone. This is because relaxing a cross-section branch may require
9338 // incrementing the stack pointer to spill a register, which would overwrite
9339 // the red zone.
9340 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
9341 return false;
9342
9344}
9345
9347 const MachineBasicBlock &MBB) const {
9348 // Asm Goto blocks can contain conditional branches to goto labels, which can
9349 // get moved out of range of the branch instruction.
9350 auto isAsmGoto = [](const MachineInstr &MI) {
9351 return MI.getOpcode() == AArch64::INLINEASM_BR;
9352 };
9353 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
9354 return false;
9355
9356 // Because jump tables are label-relative instead of table-relative, they all
9357 // must be in the same section or relocation fixup handling will fail.
9358
9359 // Check if MBB is a jump table target
9361 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
9362 return llvm::is_contained(JTE.MBBs, &MBB);
9363 };
9364 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
9365 return false;
9366
9367 // Check if MBB contains a jump table lookup
9368 for (const MachineInstr &MI : MBB) {
9369 switch (MI.getOpcode()) {
9370 case TargetOpcode::G_BRJT:
9371 case AArch64::JumpTableDest32:
9372 case AArch64::JumpTableDest16:
9373 case AArch64::JumpTableDest8:
9374 return false;
9375 default:
9376 continue;
9377 }
9378 }
9379
9380 // MBB isn't a special case, so it's safe to be split to the cold section.
9381 return true;
9382}
9383
9384std::optional<ParamLoadedValue>
9386 Register Reg) const {
9387 const MachineFunction *MF = MI.getMF();
9389 switch (MI.getOpcode()) {
9390 case AArch64::MOVZWi:
9391 case AArch64::MOVZXi: {
9392 // MOVZWi may be used for producing zero-extended 32-bit immediates in
9393 // 64-bit parameters, so we need to consider super-registers.
9394 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
9395 return std::nullopt;
9396
9397 if (!MI.getOperand(1).isImm())
9398 return std::nullopt;
9399 int64_t Immediate = MI.getOperand(1).getImm();
9400 int Shift = MI.getOperand(2).getImm();
9401 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
9402 nullptr);
9403 }
9404 case AArch64::ORRWrs:
9405 case AArch64::ORRXrs:
9406 return describeORRLoadedValue(MI, Reg, this, TRI);
9407 }
9408
9410}
9411
9413 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
9414 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
9415 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
9416 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
9417
9418 // Anyexts are nops.
9419 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
9420 return true;
9421
9422 Register DefReg = ExtMI.getOperand(0).getReg();
9423 if (!MRI.hasOneNonDBGUse(DefReg))
9424 return false;
9425
9426 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9427 // addressing mode.
9428 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
9429 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
9430}
9431
9433 return get(Opc).TSFlags & AArch64::ElementSizeMask;
9434}
9435
9436bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
9437 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
9438}
9439
9440bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
9441 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
9442}
9443
9444unsigned int
9446 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
9447}
9448
9449bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
9450 unsigned Scale) const {
9451 if (Offset && Scale)
9452 return false;
9453
9454 // Check Reg + Imm
9455 if (!Scale) {
9456 // 9-bit signed offset
9457 if (isInt<9>(Offset))
9458 return true;
9459
9460 // 12-bit unsigned offset
9461 unsigned Shift = Log2_64(NumBytes);
9462 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
9463 // Must be a multiple of NumBytes (NumBytes is a power of 2)
9464 (Offset >> Shift) << Shift == Offset)
9465 return true;
9466 return false;
9467 }
9468
9469 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
9470 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
9471}
9472
9474 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
9475 return AArch64::BLRNoIP;
9476 else
9477 return AArch64::BLR;
9478}
9479
9482 Register TargetReg, bool FrameSetup) const {
9483 assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
9484
9486 MachineFunction &MF = *MBB.getParent();
9487 const AArch64InstrInfo *TII =
9488 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
9489 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
9491
9492 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
9493 MachineBasicBlock *LoopTestMBB =
9495 MF.insert(MBBInsertPoint, LoopTestMBB);
9496 MachineBasicBlock *LoopBodyMBB =
9498 MF.insert(MBBInsertPoint, LoopBodyMBB);
9500 MF.insert(MBBInsertPoint, ExitMBB);
9501 MachineInstr::MIFlag Flags =
9503
9504 // LoopTest:
9505 // SUB SP, SP, #ProbeSize
9506 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
9507 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
9508
9509 // CMP SP, TargetReg
9510 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
9511 AArch64::XZR)
9512 .addReg(AArch64::SP)
9513 .addReg(TargetReg)
9515 .setMIFlags(Flags);
9516
9517 // B.<Cond> LoopExit
9518 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
9520 .addMBB(ExitMBB)
9521 .setMIFlags(Flags);
9522
9523 // STR XZR, [SP]
9524 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
9525 .addReg(AArch64::XZR)
9526 .addReg(AArch64::SP)
9527 .addImm(0)
9528 .setMIFlags(Flags);
9529
9530 // B loop
9531 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
9532 .addMBB(LoopTestMBB)
9533 .setMIFlags(Flags);
9534
9535 // LoopExit:
9536 // MOV SP, TargetReg
9537 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
9538 .addReg(TargetReg)
9539 .addImm(0)
9541 .setMIFlags(Flags);
9542
9543 // LDR XZR, [SP]
9544 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
9545 .addReg(AArch64::XZR, RegState::Define)
9546 .addReg(AArch64::SP)
9547 .addImm(0)
9548 .setMIFlags(Flags);
9549
9550 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
9552
9553 LoopTestMBB->addSuccessor(ExitMBB);
9554 LoopTestMBB->addSuccessor(LoopBodyMBB);
9555 LoopBodyMBB->addSuccessor(LoopTestMBB);
9556 MBB.addSuccessor(LoopTestMBB);
9557
9558 // Update liveins.
9559 if (MF.getRegInfo().reservedRegsFrozen())
9560 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
9561
9562 return ExitMBB->begin();
9563}
9564
9565namespace {
9566class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
9567 MachineInstr *PredBranch;
9569
9570public:
9571 AArch64PipelinerLoopInfo(MachineInstr *PredBranch,
9573 : PredBranch(PredBranch), Cond(Cond.begin(), Cond.end()) {}
9574
9575 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
9576 // Make the instructions for loop control be placed in stage 0.
9577 // The predecessors of PredBranch are considered by the caller.
9578 return MI == PredBranch;
9579 }
9580
9581 std::optional<bool> createTripCountGreaterCondition(
9582 int TC, MachineBasicBlock &MBB,
9583 SmallVectorImpl<MachineOperand> &CondParam) override {
9584 // A branch instruction will be inserted as "if (Cond) goto epilogue".
9585 // Cond is normalized for such use.
9586 // The predecessors of the branch are assumed to have already been inserted.
9587 CondParam = Cond;
9588 return {};
9589 }
9590
9591 void setPreheader(MachineBasicBlock *NewPreheader) override {}
9592
9593 void adjustTripCount(int TripCountAdjust) override {}
9594
9595 void disposed() override {}
9596};
9597} // namespace
9598
9599static bool isCompareAndBranch(unsigned Opcode) {
9600 switch (Opcode) {
9601 case AArch64::CBZW:
9602 case AArch64::CBZX:
9603 case AArch64::CBNZW:
9604 case AArch64::CBNZX:
9605 case AArch64::TBZW:
9606 case AArch64::TBZX:
9607 case AArch64::TBNZW:
9608 case AArch64::TBNZX:
9609 return true;
9610 }
9611 return false;
9612}
9613
9614std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
9616 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
9618 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
9619 return nullptr;
9620
9621 // Infinite loops are not supported
9622 if (TBB == LoopBB && FBB == LoopBB)
9623 return nullptr;
9624
9625 // Must be conditional branch
9626 if (FBB == nullptr)
9627 return nullptr;
9628
9629 assert((TBB == LoopBB || FBB == LoopBB) &&
9630 "The Loop must be a single-basic-block loop");
9631
9632 // Normalization for createTripCountGreaterCondition()
9633 if (TBB == LoopBB)
9635
9636 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
9638
9639 // Find the immediate predecessor of the conditional branch
9640 MachineInstr *PredBranch = nullptr;
9641 if (CondBranch->getOpcode() == AArch64::Bcc) {
9642 for (MachineInstr &MI : reverse(*LoopBB)) {
9643 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
9644 PredBranch = &MI;
9645 break;
9646 }
9647 }
9648 if (!PredBranch)
9649 return nullptr;
9650 } else if (isCompareAndBranch(CondBranch->getOpcode())) {
9651 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
9652 Register Reg = CondBranch->getOperand(0).getReg();
9653 if (!Reg.isVirtual())
9654 return nullptr;
9655 PredBranch = MRI.getVRegDef(Reg);
9656
9657 // MachinePipeliner does not expect that the immediate predecessor is a Phi
9658 if (PredBranch->isPHI())
9659 return nullptr;
9660
9661 if (PredBranch->getParent() != LoopBB)
9662 return nullptr;
9663 } else {
9664 return nullptr;
9665 }
9666
9667 return std::make_unique<AArch64PipelinerLoopInfo>(PredBranch, Cond);
9668}
9669
9670#define GET_INSTRINFO_HELPERS
9671#define GET_INSTRMAP_INFO
9672#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCompareAndBranch(unsigned Opcode)
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
@ AK_Write
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static unsigned getBranchDisplacementBits(unsigned Opc)
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static void appendVGScaledOffsetExpr(SmallVectorImpl< char > &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
const SetOfInstructions & getLOHRelated() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
void setOutliningStyle(std::string Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, int64_t &NumDataVectors)
Returns the offset in parts to which this frame offset can be decomposed for the purpose of describin...
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
uint64_t getElementSizeForOpcode(unsigned Opc) const
Returns the vector element size (B, H, S or D) of an SVE opcode.
outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool isWhileOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE WHILE## instruction.
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
SmallVector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > > getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const override
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
static bool isFpOrNEON(const MachineInstr &MI)
Returns whether the instruction is FP or NEON.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
bool isFunctionSafeToSplit(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
Return true when Inst is associative and commutative so that it can be reassociated.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isPTestLikeOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE instruction that sets the condition codes as if it's results...
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:681
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:541
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:647
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MBBSectionID getSectionID() const
Returns the section ID of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:329
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:379
uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isFullCopy() const
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:759
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:475
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:374
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents a location in source code.
Definition: SMLoc.h:23
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
int64_t getScalable() const
Returns the scalable component of the stack.
Definition: TypeSize.h:52
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
MI-level Statepoint operands.
Definition: StackMaps.h:158
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:207
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:333
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static bool isCondBranchOpcode(int Opc)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
static bool isIndirectBranchOpcode(int Opc)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:319
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ MULADDXI_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ MULSUBXI_OP1
@ FMLAv4i32_indexed_OP1
@ MULADDWI_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv8i8_OP1
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ MULADDv8i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULSUBv8i8_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBWI_OP1
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
@ MULSUBv8i8_OP2
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:233
DWARFExpression::Operation Op
static bool isUncondBranchOpcode(int Opc)
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static const MachineMemOperand::Flags MOSuppressPair
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
static const MachineMemOperand::Flags MOStridedAccess
@ Default
The result values are uniform if and only if all operands are uniform.
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:214
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
static const MBBSectionID ColdSectionID
MachineJumpTableEntry - One jump table in the jump table info.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.