LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/GlobalValue.h"
45#include "llvm/IR/Module.h"
46#include "llvm/MC/MCAsmInfo.h"
47#include "llvm/MC/MCInst.h"
49#include "llvm/MC/MCInstrDesc.h"
54#include "llvm/Support/LEB128.h"
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <utility>
62
63using namespace llvm;
64
65#define GET_INSTRINFO_CTOR_DTOR
66#include "AArch64GenInstrInfo.inc"
67
69 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
70 cl::desc("Restrict range of CB instructions (DEBUG)"));
71
73 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
74 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
75
77 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
78 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
79
81 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
82 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
83
85 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
86 cl::desc("Restrict range of B instructions (DEBUG)"));
87
89 "aarch64-search-limit", cl::Hidden, cl::init(2048),
90 cl::desc("Restrict range of instructions to search for the "
91 "machine-combiner gather pattern optimization"));
92
94 : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN,
95 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
96 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
97
98/// GetInstSize - Return the number of bytes of code the specified
99/// instruction may be. This returns the maximum number of bytes.
101 const MachineBasicBlock &MBB = *MI.getParent();
102 const MachineFunction *MF = MBB.getParent();
103 const Function &F = MF->getFunction();
104 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
105
106 {
107 auto Op = MI.getOpcode();
108 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
109 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
110 }
111
112 // Meta-instructions emit no code.
113 if (MI.isMetaInstruction())
114 return 0;
115
116 // FIXME: We currently only handle pseudoinstructions that don't get expanded
117 // before the assembly printer.
118 unsigned NumBytes = 0;
119 const MCInstrDesc &Desc = MI.getDesc();
120
121 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
122 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
123
124 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
125 if (!MFI->shouldSignReturnAddress(MF))
126 return NumBytes;
127
128 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
129 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
130 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
131 return NumBytes;
132 }
133
134 // Size should be preferably set in
135 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
136 // Specific cases handle instructions of variable sizes
137 switch (Desc.getOpcode()) {
138 default:
139 if (Desc.getSize())
140 return Desc.getSize();
141
142 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
143 // with fixed constant size but not specified in .td file) is a normal
144 // 4-byte insn.
145 NumBytes = 4;
146 break;
147 case TargetOpcode::STACKMAP:
148 // The upper bound for a stackmap intrinsic is the full length of its shadow
149 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
150 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
151 break;
152 case TargetOpcode::PATCHPOINT:
153 // The size of the patchpoint intrinsic is the number of bytes requested
154 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
155 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
156 break;
157 case TargetOpcode::STATEPOINT:
158 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
159 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
160 // No patch bytes means a normal call inst is emitted
161 if (NumBytes == 0)
162 NumBytes = 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
165 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
166 // instructions are expanded to the specified number of NOPs. Otherwise,
167 // they are expanded to 36-byte XRay sleds.
168 NumBytes =
169 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
170 break;
171 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
172 case TargetOpcode::PATCHABLE_TAIL_CALL:
173 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
174 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
175 NumBytes = 36;
176 break;
177 case TargetOpcode::PATCHABLE_EVENT_CALL:
178 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
179 NumBytes = 24;
180 break;
181
182 case AArch64::SPACE:
183 NumBytes = MI.getOperand(1).getImm();
184 break;
185 case TargetOpcode::BUNDLE:
186 NumBytes = getInstBundleLength(MI);
187 break;
188 }
189
190 return NumBytes;
191}
192
193unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
194 unsigned Size = 0;
196 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
197 while (++I != E && I->isInsideBundle()) {
198 assert(!I->isBundle() && "No nested bundle!");
200 }
201 return Size;
202}
203
206 // Block ends with fall-through condbranch.
207 switch (LastInst->getOpcode()) {
208 default:
209 llvm_unreachable("Unknown branch instruction?");
210 case AArch64::Bcc:
211 Target = LastInst->getOperand(1).getMBB();
212 Cond.push_back(LastInst->getOperand(0));
213 break;
214 case AArch64::CBZW:
215 case AArch64::CBZX:
216 case AArch64::CBNZW:
217 case AArch64::CBNZX:
218 Target = LastInst->getOperand(1).getMBB();
219 Cond.push_back(MachineOperand::CreateImm(-1));
220 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
221 Cond.push_back(LastInst->getOperand(0));
222 break;
223 case AArch64::TBZW:
224 case AArch64::TBZX:
225 case AArch64::TBNZW:
226 case AArch64::TBNZX:
227 Target = LastInst->getOperand(2).getMBB();
228 Cond.push_back(MachineOperand::CreateImm(-1));
229 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
230 Cond.push_back(LastInst->getOperand(0));
231 Cond.push_back(LastInst->getOperand(1));
232 break;
233 case AArch64::CBWPri:
234 case AArch64::CBXPri:
235 case AArch64::CBWPrr:
236 case AArch64::CBXPrr:
237 Target = LastInst->getOperand(3).getMBB();
238 Cond.push_back(MachineOperand::CreateImm(-1));
239 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
240 Cond.push_back(LastInst->getOperand(0));
241 Cond.push_back(LastInst->getOperand(1));
242 Cond.push_back(LastInst->getOperand(2));
243 break;
244 }
245}
246
247static unsigned getBranchDisplacementBits(unsigned Opc) {
248 switch (Opc) {
249 default:
250 llvm_unreachable("unexpected opcode!");
251 case AArch64::B:
252 return BDisplacementBits;
253 case AArch64::TBNZW:
254 case AArch64::TBZW:
255 case AArch64::TBNZX:
256 case AArch64::TBZX:
257 return TBZDisplacementBits;
258 case AArch64::CBNZW:
259 case AArch64::CBZW:
260 case AArch64::CBNZX:
261 case AArch64::CBZX:
262 return CBZDisplacementBits;
263 case AArch64::Bcc:
264 return BCCDisplacementBits;
265 case AArch64::CBWPri:
266 case AArch64::CBXPri:
267 case AArch64::CBWPrr:
268 case AArch64::CBXPrr:
269 return CBDisplacementBits;
270 }
271}
272
274 int64_t BrOffset) const {
275 unsigned Bits = getBranchDisplacementBits(BranchOp);
276 assert(Bits >= 3 && "max branch displacement must be enough to jump"
277 "over conditional branch expansion");
278 return isIntN(Bits, BrOffset / 4);
279}
280
283 switch (MI.getOpcode()) {
284 default:
285 llvm_unreachable("unexpected opcode!");
286 case AArch64::B:
287 return MI.getOperand(0).getMBB();
288 case AArch64::TBZW:
289 case AArch64::TBNZW:
290 case AArch64::TBZX:
291 case AArch64::TBNZX:
292 return MI.getOperand(2).getMBB();
293 case AArch64::CBZW:
294 case AArch64::CBNZW:
295 case AArch64::CBZX:
296 case AArch64::CBNZX:
297 case AArch64::Bcc:
298 return MI.getOperand(1).getMBB();
299 case AArch64::CBWPri:
300 case AArch64::CBXPri:
301 case AArch64::CBWPrr:
302 case AArch64::CBXPrr:
303 return MI.getOperand(3).getMBB();
304 }
305}
306
308 MachineBasicBlock &NewDestBB,
309 MachineBasicBlock &RestoreBB,
310 const DebugLoc &DL,
311 int64_t BrOffset,
312 RegScavenger *RS) const {
313 assert(RS && "RegScavenger required for long branching");
314 assert(MBB.empty() &&
315 "new block should be inserted for expanding unconditional branch");
316 assert(MBB.pred_size() == 1);
317 assert(RestoreBB.empty() &&
318 "restore block should be inserted for restoring clobbered registers");
319
320 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
321 // Offsets outside of the signed 33-bit range are not supported for ADRP +
322 // ADD.
323 if (!isInt<33>(BrOffset))
325 "Branch offsets outside of the signed 33-bit range not supported");
326
327 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
328 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
330 .addReg(Reg)
331 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
332 .addImm(0);
333 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
334 };
335
337 // If X16 is unused, we can rely on the linker to insert a range extension
338 // thunk if NewDestBB is out of range of a single B instruction.
339 constexpr Register Reg = AArch64::X16;
340 if (!RS->isRegUsed(Reg)) {
341 insertUnconditionalBranch(MBB, &NewDestBB, DL);
342 RS->setRegUsed(Reg);
343 return;
344 }
345
346 // If there's a free register and it's worth inflating the code size,
347 // manually insert the indirect branch.
348 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
349 if (Scavenged != AArch64::NoRegister &&
350 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
351 buildIndirectBranch(Scavenged, NewDestBB);
352 RS->setRegUsed(Scavenged);
353 return;
354 }
355
356 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
357 // with red zones.
358 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
359 if (!AFI || AFI->hasRedZone().value_or(true))
361 "Unable to insert indirect branch inside function that has red zone");
362
363 // Otherwise, spill X16 and defer range extension to the linker.
364 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
365 .addReg(AArch64::SP, RegState::Define)
366 .addReg(Reg)
367 .addReg(AArch64::SP)
368 .addImm(-16);
369
370 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
371
372 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
373 .addReg(AArch64::SP, RegState::Define)
375 .addReg(AArch64::SP)
376 .addImm(16);
377}
378
379// Branch analysis.
382 MachineBasicBlock *&FBB,
384 bool AllowModify) const {
385 // If the block has no terminators, it just falls into the block after it.
386 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
387 if (I == MBB.end())
388 return false;
389
390 // Skip over SpeculationBarrierEndBB terminators
391 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
392 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
393 --I;
394 }
395
396 if (!isUnpredicatedTerminator(*I))
397 return false;
398
399 // Get the last instruction in the block.
400 MachineInstr *LastInst = &*I;
401
402 // If there is only one terminator instruction, process it.
403 unsigned LastOpc = LastInst->getOpcode();
404 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
405 if (isUncondBranchOpcode(LastOpc)) {
406 TBB = LastInst->getOperand(0).getMBB();
407 return false;
408 }
409 if (isCondBranchOpcode(LastOpc)) {
410 // Block ends with fall-through condbranch.
411 parseCondBranch(LastInst, TBB, Cond);
412 return false;
413 }
414 return true; // Can't handle indirect branch.
415 }
416
417 // Get the instruction before it if it is a terminator.
418 MachineInstr *SecondLastInst = &*I;
419 unsigned SecondLastOpc = SecondLastInst->getOpcode();
420
421 // If AllowModify is true and the block ends with two or more unconditional
422 // branches, delete all but the first unconditional branch.
423 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
424 while (isUncondBranchOpcode(SecondLastOpc)) {
425 LastInst->eraseFromParent();
426 LastInst = SecondLastInst;
427 LastOpc = LastInst->getOpcode();
428 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
429 // Return now the only terminator is an unconditional branch.
430 TBB = LastInst->getOperand(0).getMBB();
431 return false;
432 }
433 SecondLastInst = &*I;
434 SecondLastOpc = SecondLastInst->getOpcode();
435 }
436 }
437
438 // If we're allowed to modify and the block ends in a unconditional branch
439 // which could simply fallthrough, remove the branch. (Note: This case only
440 // matters when we can't understand the whole sequence, otherwise it's also
441 // handled by BranchFolding.cpp.)
442 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
443 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
444 LastInst->eraseFromParent();
445 LastInst = SecondLastInst;
446 LastOpc = LastInst->getOpcode();
447 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
448 assert(!isUncondBranchOpcode(LastOpc) &&
449 "unreachable unconditional branches removed above");
450
451 if (isCondBranchOpcode(LastOpc)) {
452 // Block ends with fall-through condbranch.
453 parseCondBranch(LastInst, TBB, Cond);
454 return false;
455 }
456 return true; // Can't handle indirect branch.
457 }
458 SecondLastInst = &*I;
459 SecondLastOpc = SecondLastInst->getOpcode();
460 }
461
462 // If there are three terminators, we don't know what sort of block this is.
463 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
464 return true;
465
466 // If the block ends with a B and a Bcc, handle it.
467 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
468 parseCondBranch(SecondLastInst, TBB, Cond);
469 FBB = LastInst->getOperand(0).getMBB();
470 return false;
471 }
472
473 // If the block ends with two unconditional branches, handle it. The second
474 // one is not executed, so remove it.
475 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
476 TBB = SecondLastInst->getOperand(0).getMBB();
477 I = LastInst;
478 if (AllowModify)
479 I->eraseFromParent();
480 return false;
481 }
482
483 // ...likewise if it ends with an indirect branch followed by an unconditional
484 // branch.
485 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
486 I = LastInst;
487 if (AllowModify)
488 I->eraseFromParent();
489 return true;
490 }
491
492 // Otherwise, can't handle this.
493 return true;
494}
495
497 MachineBranchPredicate &MBP,
498 bool AllowModify) const {
499 // For the moment, handle only a block which ends with a cb(n)zx followed by
500 // a fallthrough. Why this? Because it is a common form.
501 // TODO: Should we handle b.cc?
502
503 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
504 if (I == MBB.end())
505 return true;
506
507 // Skip over SpeculationBarrierEndBB terminators
508 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
509 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
510 --I;
511 }
512
513 if (!isUnpredicatedTerminator(*I))
514 return true;
515
516 // Get the last instruction in the block.
517 MachineInstr *LastInst = &*I;
518 unsigned LastOpc = LastInst->getOpcode();
519 if (!isCondBranchOpcode(LastOpc))
520 return true;
521
522 switch (LastOpc) {
523 default:
524 return true;
525 case AArch64::CBZW:
526 case AArch64::CBZX:
527 case AArch64::CBNZW:
528 case AArch64::CBNZX:
529 break;
530 };
531
532 MBP.TrueDest = LastInst->getOperand(1).getMBB();
533 assert(MBP.TrueDest && "expected!");
534 MBP.FalseDest = MBB.getNextNode();
535
536 MBP.ConditionDef = nullptr;
537 MBP.SingleUseCondition = false;
538
539 MBP.LHS = LastInst->getOperand(0);
540 MBP.RHS = MachineOperand::CreateImm(0);
541 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
542 ? MachineBranchPredicate::PRED_NE
543 : MachineBranchPredicate::PRED_EQ;
544 return false;
545}
546
549 if (Cond[0].getImm() != -1) {
550 // Regular Bcc
551 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
553 } else {
554 // Folded compare-and-branch
555 switch (Cond[1].getImm()) {
556 default:
557 llvm_unreachable("Unknown conditional branch!");
558 case AArch64::CBZW:
559 Cond[1].setImm(AArch64::CBNZW);
560 break;
561 case AArch64::CBNZW:
562 Cond[1].setImm(AArch64::CBZW);
563 break;
564 case AArch64::CBZX:
565 Cond[1].setImm(AArch64::CBNZX);
566 break;
567 case AArch64::CBNZX:
568 Cond[1].setImm(AArch64::CBZX);
569 break;
570 case AArch64::TBZW:
571 Cond[1].setImm(AArch64::TBNZW);
572 break;
573 case AArch64::TBNZW:
574 Cond[1].setImm(AArch64::TBZW);
575 break;
576 case AArch64::TBZX:
577 Cond[1].setImm(AArch64::TBNZX);
578 break;
579 case AArch64::TBNZX:
580 Cond[1].setImm(AArch64::TBZX);
581 break;
582
583 // Cond is { -1, Opcode, CC, Op0, Op1 }
584 case AArch64::CBWPri:
585 case AArch64::CBXPri:
586 case AArch64::CBWPrr:
587 case AArch64::CBXPrr: {
588 // Pseudos using standard 4bit Arm condition codes
590 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
592 }
593 }
594 }
595
596 return false;
597}
598
600 int *BytesRemoved) const {
601 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
602 if (I == MBB.end())
603 return 0;
604
605 if (!isUncondBranchOpcode(I->getOpcode()) &&
606 !isCondBranchOpcode(I->getOpcode()))
607 return 0;
608
609 // Remove the branch.
610 I->eraseFromParent();
611
612 I = MBB.end();
613
614 if (I == MBB.begin()) {
615 if (BytesRemoved)
616 *BytesRemoved = 4;
617 return 1;
618 }
619 --I;
620 if (!isCondBranchOpcode(I->getOpcode())) {
621 if (BytesRemoved)
622 *BytesRemoved = 4;
623 return 1;
624 }
625
626 // Remove the branch.
627 I->eraseFromParent();
628 if (BytesRemoved)
629 *BytesRemoved = 8;
630
631 return 2;
632}
633
634void AArch64InstrInfo::instantiateCondBranch(
637 if (Cond[0].getImm() != -1) {
638 // Regular Bcc
639 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
640 } else {
641 // Folded compare-and-branch
642 // Note that we use addOperand instead of addReg to keep the flags.
643
644 // cbz, cbnz
645 const MachineInstrBuilder MIB =
646 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
647
648 // tbz/tbnz
649 if (Cond.size() > 3)
650 MIB.add(Cond[3]);
651
652 // cb
653 if (Cond.size() > 4)
654 MIB.add(Cond[4]);
655
656 MIB.addMBB(TBB);
657 }
658}
659
662 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
663 // Shouldn't be a fall through.
664 assert(TBB && "insertBranch must not be told to insert a fallthrough");
665
666 if (!FBB) {
667 if (Cond.empty()) // Unconditional branch?
668 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
669 else
670 instantiateCondBranch(MBB, DL, TBB, Cond);
671
672 if (BytesAdded)
673 *BytesAdded = 4;
674
675 return 1;
676 }
677
678 // Two-way conditional branch.
679 instantiateCondBranch(MBB, DL, TBB, Cond);
680 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
681
682 if (BytesAdded)
683 *BytesAdded = 8;
684
685 return 2;
686}
687
688// Find the original register that VReg is copied from.
689static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
690 while (Register::isVirtualRegister(VReg)) {
691 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
692 if (!DefMI->isFullCopy())
693 return VReg;
694 VReg = DefMI->getOperand(1).getReg();
695 }
696 return VReg;
697}
698
699// Determine if VReg is defined by an instruction that can be folded into a
700// csel instruction. If so, return the folded opcode, and the replacement
701// register.
702static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
703 unsigned *NewVReg = nullptr) {
704 VReg = removeCopies(MRI, VReg);
706 return 0;
707
708 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
709 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
710 unsigned Opc = 0;
711 unsigned SrcOpNum = 0;
712 switch (DefMI->getOpcode()) {
713 case AArch64::ADDSXri:
714 case AArch64::ADDSWri:
715 // if NZCV is used, do not fold.
716 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
717 true) == -1)
718 return 0;
719 // fall-through to ADDXri and ADDWri.
720 [[fallthrough]];
721 case AArch64::ADDXri:
722 case AArch64::ADDWri:
723 // add x, 1 -> csinc.
724 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
725 DefMI->getOperand(3).getImm() != 0)
726 return 0;
727 SrcOpNum = 1;
728 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
729 break;
730
731 case AArch64::ORNXrr:
732 case AArch64::ORNWrr: {
733 // not x -> csinv, represented as orn dst, xzr, src.
734 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
735 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
736 return 0;
737 SrcOpNum = 2;
738 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
739 break;
740 }
741
742 case AArch64::SUBSXrr:
743 case AArch64::SUBSWrr:
744 // if NZCV is used, do not fold.
745 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
746 true) == -1)
747 return 0;
748 // fall-through to SUBXrr and SUBWrr.
749 [[fallthrough]];
750 case AArch64::SUBXrr:
751 case AArch64::SUBWrr: {
752 // neg x -> csneg, represented as sub dst, xzr, src.
753 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
754 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
755 return 0;
756 SrcOpNum = 2;
757 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
758 break;
759 }
760 default:
761 return 0;
762 }
763 assert(Opc && SrcOpNum && "Missing parameters");
764
765 if (NewVReg)
766 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
767 return Opc;
768}
769
772 Register DstReg, Register TrueReg,
773 Register FalseReg, int &CondCycles,
774 int &TrueCycles,
775 int &FalseCycles) const {
776 // Check register classes.
777 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
778 const TargetRegisterClass *RC =
779 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
780 if (!RC)
781 return false;
782
783 // Also need to check the dest regclass, in case we're trying to optimize
784 // something like:
785 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
786 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
787 return false;
788
789 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
790 unsigned ExtraCondLat = Cond.size() != 1;
791
792 // GPRs are handled by csel.
793 // FIXME: Fold in x+1, -x, and ~x when applicable.
794 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
795 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
796 // Single-cycle csel, csinc, csinv, and csneg.
797 CondCycles = 1 + ExtraCondLat;
798 TrueCycles = FalseCycles = 1;
799 if (canFoldIntoCSel(MRI, TrueReg))
800 TrueCycles = 0;
801 else if (canFoldIntoCSel(MRI, FalseReg))
802 FalseCycles = 0;
803 return true;
804 }
805
806 // Scalar floating point is handled by fcsel.
807 // FIXME: Form fabs, fmin, and fmax when applicable.
808 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
809 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
810 CondCycles = 5 + ExtraCondLat;
811 TrueCycles = FalseCycles = 2;
812 return true;
813 }
814
815 // Can't do vectors.
816 return false;
817}
818
821 const DebugLoc &DL, Register DstReg,
823 Register TrueReg, Register FalseReg) const {
824 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
825
826 // Parse the condition code, see parseCondBranch() above.
828 switch (Cond.size()) {
829 default:
830 llvm_unreachable("Unknown condition opcode in Cond");
831 case 1: // b.cc
833 break;
834 case 3: { // cbz/cbnz
835 // We must insert a compare against 0.
836 bool Is64Bit;
837 switch (Cond[1].getImm()) {
838 default:
839 llvm_unreachable("Unknown branch opcode in Cond");
840 case AArch64::CBZW:
841 Is64Bit = false;
842 CC = AArch64CC::EQ;
843 break;
844 case AArch64::CBZX:
845 Is64Bit = true;
846 CC = AArch64CC::EQ;
847 break;
848 case AArch64::CBNZW:
849 Is64Bit = false;
850 CC = AArch64CC::NE;
851 break;
852 case AArch64::CBNZX:
853 Is64Bit = true;
854 CC = AArch64CC::NE;
855 break;
856 }
857 Register SrcReg = Cond[2].getReg();
858 if (Is64Bit) {
859 // cmp reg, #0 is actually subs xzr, reg, #0.
860 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
861 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
862 .addReg(SrcReg)
863 .addImm(0)
864 .addImm(0);
865 } else {
866 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
867 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
868 .addReg(SrcReg)
869 .addImm(0)
870 .addImm(0);
871 }
872 break;
873 }
874 case 4: { // tbz/tbnz
875 // We must insert a tst instruction.
876 switch (Cond[1].getImm()) {
877 default:
878 llvm_unreachable("Unknown branch opcode in Cond");
879 case AArch64::TBZW:
880 case AArch64::TBZX:
881 CC = AArch64CC::EQ;
882 break;
883 case AArch64::TBNZW:
884 case AArch64::TBNZX:
885 CC = AArch64CC::NE;
886 break;
887 }
888 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
889 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
890 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
891 .addReg(Cond[2].getReg())
892 .addImm(
894 else
895 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
896 .addReg(Cond[2].getReg())
897 .addImm(
899 break;
900 }
901 case 5: { // cb
902 // We must insert a cmp, that is a subs
903 // 0 1 2 3 4
904 // Cond is { -1, Opcode, CC, Op0, Op1 }
905 unsigned SUBSOpC, SUBSDestReg;
906 bool IsImm = false;
907 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
908 switch (Cond[1].getImm()) {
909 default:
910 llvm_unreachable("Unknown branch opcode in Cond");
911 case AArch64::CBWPri:
912 SUBSOpC = AArch64::SUBSWri;
913 SUBSDestReg = AArch64::WZR;
914 IsImm = true;
915 break;
916 case AArch64::CBXPri:
917 SUBSOpC = AArch64::SUBSXri;
918 SUBSDestReg = AArch64::XZR;
919 IsImm = true;
920 break;
921 case AArch64::CBWPrr:
922 SUBSOpC = AArch64::SUBSWrr;
923 SUBSDestReg = AArch64::WZR;
924 IsImm = false;
925 break;
926 case AArch64::CBXPrr:
927 SUBSOpC = AArch64::SUBSXrr;
928 SUBSDestReg = AArch64::XZR;
929 IsImm = false;
930 break;
931 }
932
933 if (IsImm)
934 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
935 .addReg(Cond[3].getReg())
936 .addImm(Cond[4].getImm())
937 .addImm(0);
938 else
939 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
940 .addReg(Cond[3].getReg())
941 .addReg(Cond[4].getReg());
942 }
943 }
944
945 unsigned Opc = 0;
946 const TargetRegisterClass *RC = nullptr;
947 bool TryFold = false;
948 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
949 RC = &AArch64::GPR64RegClass;
950 Opc = AArch64::CSELXr;
951 TryFold = true;
952 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
953 RC = &AArch64::GPR32RegClass;
954 Opc = AArch64::CSELWr;
955 TryFold = true;
956 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
957 RC = &AArch64::FPR64RegClass;
958 Opc = AArch64::FCSELDrrr;
959 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
960 RC = &AArch64::FPR32RegClass;
961 Opc = AArch64::FCSELSrrr;
962 }
963 assert(RC && "Unsupported regclass");
964
965 // Try folding simple instructions into the csel.
966 if (TryFold) {
967 unsigned NewVReg = 0;
968 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
969 if (FoldedOpc) {
970 // The folded opcodes csinc, csinc and csneg apply the operation to
971 // FalseReg, so we need to invert the condition.
973 TrueReg = FalseReg;
974 } else
975 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
976
977 // Fold the operation. Leave any dead instructions for DCE to clean up.
978 if (FoldedOpc) {
979 FalseReg = NewVReg;
980 Opc = FoldedOpc;
981 // The extends the live range of NewVReg.
982 MRI.clearKillFlags(NewVReg);
983 }
984 }
985
986 // Pull all virtual register into the appropriate class.
987 MRI.constrainRegClass(TrueReg, RC);
988 MRI.constrainRegClass(FalseReg, RC);
989
990 // Insert the csel.
991 BuildMI(MBB, I, DL, get(Opc), DstReg)
992 .addReg(TrueReg)
993 .addReg(FalseReg)
994 .addImm(CC);
995}
996
997// Return true if Imm can be loaded into a register by a "cheap" sequence of
998// instructions. For now, "cheap" means at most two instructions.
999static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1000 if (BitSize == 32)
1001 return true;
1002
1003 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1004 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1006 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1007
1008 return Is.size() <= 2;
1009}
1010
1011// FIXME: this implementation should be micro-architecture dependent, so a
1012// micro-architecture target hook should be introduced here in future.
1014 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1015 if (isExynosCheapAsMove(MI))
1016 return true;
1017 return MI.isAsCheapAsAMove();
1018 }
1019
1020 switch (MI.getOpcode()) {
1021 default:
1022 return MI.isAsCheapAsAMove();
1023
1024 case AArch64::ADDWrs:
1025 case AArch64::ADDXrs:
1026 case AArch64::SUBWrs:
1027 case AArch64::SUBXrs:
1028 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1029
1030 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1031 // ORRXri, it is as cheap as MOV.
1032 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1033 case AArch64::MOVi32imm:
1034 return isCheapImmediate(MI, 32);
1035 case AArch64::MOVi64imm:
1036 return isCheapImmediate(MI, 64);
1037 }
1038}
1039
1040bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1041 switch (MI.getOpcode()) {
1042 default:
1043 return false;
1044
1045 case AArch64::ADDWrs:
1046 case AArch64::ADDXrs:
1047 case AArch64::ADDSWrs:
1048 case AArch64::ADDSXrs: {
1049 unsigned Imm = MI.getOperand(3).getImm();
1050 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1051 if (ShiftVal == 0)
1052 return true;
1053 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1054 }
1055
1056 case AArch64::ADDWrx:
1057 case AArch64::ADDXrx:
1058 case AArch64::ADDXrx64:
1059 case AArch64::ADDSWrx:
1060 case AArch64::ADDSXrx:
1061 case AArch64::ADDSXrx64: {
1062 unsigned Imm = MI.getOperand(3).getImm();
1063 switch (AArch64_AM::getArithExtendType(Imm)) {
1064 default:
1065 return false;
1066 case AArch64_AM::UXTB:
1067 case AArch64_AM::UXTH:
1068 case AArch64_AM::UXTW:
1069 case AArch64_AM::UXTX:
1070 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1071 }
1072 }
1073
1074 case AArch64::SUBWrs:
1075 case AArch64::SUBSWrs: {
1076 unsigned Imm = MI.getOperand(3).getImm();
1077 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1078 return ShiftVal == 0 ||
1079 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1080 }
1081
1082 case AArch64::SUBXrs:
1083 case AArch64::SUBSXrs: {
1084 unsigned Imm = MI.getOperand(3).getImm();
1085 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1086 return ShiftVal == 0 ||
1087 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1088 }
1089
1090 case AArch64::SUBWrx:
1091 case AArch64::SUBXrx:
1092 case AArch64::SUBXrx64:
1093 case AArch64::SUBSWrx:
1094 case AArch64::SUBSXrx:
1095 case AArch64::SUBSXrx64: {
1096 unsigned Imm = MI.getOperand(3).getImm();
1097 switch (AArch64_AM::getArithExtendType(Imm)) {
1098 default:
1099 return false;
1100 case AArch64_AM::UXTB:
1101 case AArch64_AM::UXTH:
1102 case AArch64_AM::UXTW:
1103 case AArch64_AM::UXTX:
1104 return AArch64_AM::getArithShiftValue(Imm) == 0;
1105 }
1106 }
1107
1108 case AArch64::LDRBBroW:
1109 case AArch64::LDRBBroX:
1110 case AArch64::LDRBroW:
1111 case AArch64::LDRBroX:
1112 case AArch64::LDRDroW:
1113 case AArch64::LDRDroX:
1114 case AArch64::LDRHHroW:
1115 case AArch64::LDRHHroX:
1116 case AArch64::LDRHroW:
1117 case AArch64::LDRHroX:
1118 case AArch64::LDRQroW:
1119 case AArch64::LDRQroX:
1120 case AArch64::LDRSBWroW:
1121 case AArch64::LDRSBWroX:
1122 case AArch64::LDRSBXroW:
1123 case AArch64::LDRSBXroX:
1124 case AArch64::LDRSHWroW:
1125 case AArch64::LDRSHWroX:
1126 case AArch64::LDRSHXroW:
1127 case AArch64::LDRSHXroX:
1128 case AArch64::LDRSWroW:
1129 case AArch64::LDRSWroX:
1130 case AArch64::LDRSroW:
1131 case AArch64::LDRSroX:
1132 case AArch64::LDRWroW:
1133 case AArch64::LDRWroX:
1134 case AArch64::LDRXroW:
1135 case AArch64::LDRXroX:
1136 case AArch64::PRFMroW:
1137 case AArch64::PRFMroX:
1138 case AArch64::STRBBroW:
1139 case AArch64::STRBBroX:
1140 case AArch64::STRBroW:
1141 case AArch64::STRBroX:
1142 case AArch64::STRDroW:
1143 case AArch64::STRDroX:
1144 case AArch64::STRHHroW:
1145 case AArch64::STRHHroX:
1146 case AArch64::STRHroW:
1147 case AArch64::STRHroX:
1148 case AArch64::STRQroW:
1149 case AArch64::STRQroX:
1150 case AArch64::STRSroW:
1151 case AArch64::STRSroX:
1152 case AArch64::STRWroW:
1153 case AArch64::STRWroX:
1154 case AArch64::STRXroW:
1155 case AArch64::STRXroX: {
1156 unsigned IsSigned = MI.getOperand(3).getImm();
1157 return !IsSigned;
1158 }
1159 }
1160}
1161
1162bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1163 unsigned Opc = MI.getOpcode();
1164 switch (Opc) {
1165 default:
1166 return false;
1167 case AArch64::SEH_StackAlloc:
1168 case AArch64::SEH_SaveFPLR:
1169 case AArch64::SEH_SaveFPLR_X:
1170 case AArch64::SEH_SaveReg:
1171 case AArch64::SEH_SaveReg_X:
1172 case AArch64::SEH_SaveRegP:
1173 case AArch64::SEH_SaveRegP_X:
1174 case AArch64::SEH_SaveFReg:
1175 case AArch64::SEH_SaveFReg_X:
1176 case AArch64::SEH_SaveFRegP:
1177 case AArch64::SEH_SaveFRegP_X:
1178 case AArch64::SEH_SetFP:
1179 case AArch64::SEH_AddFP:
1180 case AArch64::SEH_Nop:
1181 case AArch64::SEH_PrologEnd:
1182 case AArch64::SEH_EpilogStart:
1183 case AArch64::SEH_EpilogEnd:
1184 case AArch64::SEH_PACSignLR:
1185 case AArch64::SEH_SaveAnyRegQP:
1186 case AArch64::SEH_SaveAnyRegQPX:
1187 case AArch64::SEH_AllocZ:
1188 case AArch64::SEH_SaveZReg:
1189 case AArch64::SEH_SavePReg:
1190 return true;
1191 }
1192}
1193
1195 Register &SrcReg, Register &DstReg,
1196 unsigned &SubIdx) const {
1197 switch (MI.getOpcode()) {
1198 default:
1199 return false;
1200 case AArch64::SBFMXri: // aka sxtw
1201 case AArch64::UBFMXri: // aka uxtw
1202 // Check for the 32 -> 64 bit extension case, these instructions can do
1203 // much more.
1204 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1205 return false;
1206 // This is a signed or unsigned 32 -> 64 bit extension.
1207 SrcReg = MI.getOperand(1).getReg();
1208 DstReg = MI.getOperand(0).getReg();
1209 SubIdx = AArch64::sub_32;
1210 return true;
1211 }
1212}
1213
1215 const MachineInstr &MIa, const MachineInstr &MIb) const {
1217 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1218 int64_t OffsetA = 0, OffsetB = 0;
1219 TypeSize WidthA(0, false), WidthB(0, false);
1220 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1221
1222 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1223 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1224
1227 return false;
1228
1229 // Retrieve the base, offset from the base and width. Width
1230 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1231 // base are identical, and the offset of a lower memory access +
1232 // the width doesn't overlap the offset of a higher memory access,
1233 // then the memory accesses are different.
1234 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1235 // are assumed to have the same scale (vscale).
1236 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1237 WidthA, TRI) &&
1238 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1239 WidthB, TRI)) {
1240 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1241 OffsetAIsScalable == OffsetBIsScalable) {
1242 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1243 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1244 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1245 if (LowWidth.isScalable() == OffsetAIsScalable &&
1246 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1247 return true;
1248 }
1249 }
1250 return false;
1251}
1252
1254 const MachineBasicBlock *MBB,
1255 const MachineFunction &MF) const {
1257 return true;
1258
1259 // Do not move an instruction that can be recognized as a branch target.
1260 if (hasBTISemantics(MI))
1261 return true;
1262
1263 switch (MI.getOpcode()) {
1264 case AArch64::HINT:
1265 // CSDB hints are scheduling barriers.
1266 if (MI.getOperand(0).getImm() == 0x14)
1267 return true;
1268 break;
1269 case AArch64::DSB:
1270 case AArch64::ISB:
1271 // DSB and ISB also are scheduling barriers.
1272 return true;
1273 case AArch64::MSRpstatesvcrImm1:
1274 // SMSTART and SMSTOP are also scheduling barriers.
1275 return true;
1276 default:;
1277 }
1278 if (isSEHInstruction(MI))
1279 return true;
1280 auto Next = std::next(MI.getIterator());
1281 return Next != MBB->end() && Next->isCFIInstruction();
1282}
1283
1284/// analyzeCompare - For a comparison instruction, return the source registers
1285/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1286/// Return true if the comparison instruction can be analyzed.
1288 Register &SrcReg2, int64_t &CmpMask,
1289 int64_t &CmpValue) const {
1290 // The first operand can be a frame index where we'd normally expect a
1291 // register.
1292 // FIXME: Pass subregisters out of analyzeCompare
1293 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1294 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1295 return false;
1296
1297 switch (MI.getOpcode()) {
1298 default:
1299 break;
1300 case AArch64::PTEST_PP:
1301 case AArch64::PTEST_PP_ANY:
1302 case AArch64::PTEST_PP_FIRST:
1303 SrcReg = MI.getOperand(0).getReg();
1304 SrcReg2 = MI.getOperand(1).getReg();
1305 if (MI.getOperand(2).getSubReg())
1306 return false;
1307
1308 // Not sure about the mask and value for now...
1309 CmpMask = ~0;
1310 CmpValue = 0;
1311 return true;
1312 case AArch64::SUBSWrr:
1313 case AArch64::SUBSWrs:
1314 case AArch64::SUBSWrx:
1315 case AArch64::SUBSXrr:
1316 case AArch64::SUBSXrs:
1317 case AArch64::SUBSXrx:
1318 case AArch64::ADDSWrr:
1319 case AArch64::ADDSWrs:
1320 case AArch64::ADDSWrx:
1321 case AArch64::ADDSXrr:
1322 case AArch64::ADDSXrs:
1323 case AArch64::ADDSXrx:
1324 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1325 SrcReg = MI.getOperand(1).getReg();
1326 SrcReg2 = MI.getOperand(2).getReg();
1327
1328 // FIXME: Pass subregisters out of analyzeCompare
1329 if (MI.getOperand(2).getSubReg())
1330 return false;
1331
1332 CmpMask = ~0;
1333 CmpValue = 0;
1334 return true;
1335 case AArch64::SUBSWri:
1336 case AArch64::ADDSWri:
1337 case AArch64::SUBSXri:
1338 case AArch64::ADDSXri:
1339 SrcReg = MI.getOperand(1).getReg();
1340 SrcReg2 = 0;
1341 CmpMask = ~0;
1342 CmpValue = MI.getOperand(2).getImm();
1343 return true;
1344 case AArch64::ANDSWri:
1345 case AArch64::ANDSXri:
1346 // ANDS does not use the same encoding scheme as the others xxxS
1347 // instructions.
1348 SrcReg = MI.getOperand(1).getReg();
1349 SrcReg2 = 0;
1350 CmpMask = ~0;
1352 MI.getOperand(2).getImm(),
1353 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1354 return true;
1355 }
1356
1357 return false;
1358}
1359
1361 MachineBasicBlock *MBB = Instr.getParent();
1362 assert(MBB && "Can't get MachineBasicBlock here");
1363 MachineFunction *MF = MBB->getParent();
1364 assert(MF && "Can't get MachineFunction here");
1368
1369 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1370 ++OpIdx) {
1371 MachineOperand &MO = Instr.getOperand(OpIdx);
1372 const TargetRegisterClass *OpRegCstraints =
1373 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1374
1375 // If there's no constraint, there's nothing to do.
1376 if (!OpRegCstraints)
1377 continue;
1378 // If the operand is a frame index, there's nothing to do here.
1379 // A frame index operand will resolve correctly during PEI.
1380 if (MO.isFI())
1381 continue;
1382
1383 assert(MO.isReg() &&
1384 "Operand has register constraints without being a register!");
1385
1386 Register Reg = MO.getReg();
1387 if (Reg.isPhysical()) {
1388 if (!OpRegCstraints->contains(Reg))
1389 return false;
1390 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1391 !MRI->constrainRegClass(Reg, OpRegCstraints))
1392 return false;
1393 }
1394
1395 return true;
1396}
1397
1398/// Return the opcode that does not set flags when possible - otherwise
1399/// return the original opcode. The caller is responsible to do the actual
1400/// substitution and legality checking.
1402 // Don't convert all compare instructions, because for some the zero register
1403 // encoding becomes the sp register.
1404 bool MIDefinesZeroReg = false;
1405 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1406 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1407 MIDefinesZeroReg = true;
1408
1409 switch (MI.getOpcode()) {
1410 default:
1411 return MI.getOpcode();
1412 case AArch64::ADDSWrr:
1413 return AArch64::ADDWrr;
1414 case AArch64::ADDSWri:
1415 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1416 case AArch64::ADDSWrs:
1417 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1418 case AArch64::ADDSWrx:
1419 return AArch64::ADDWrx;
1420 case AArch64::ADDSXrr:
1421 return AArch64::ADDXrr;
1422 case AArch64::ADDSXri:
1423 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1424 case AArch64::ADDSXrs:
1425 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1426 case AArch64::ADDSXrx:
1427 return AArch64::ADDXrx;
1428 case AArch64::SUBSWrr:
1429 return AArch64::SUBWrr;
1430 case AArch64::SUBSWri:
1431 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1432 case AArch64::SUBSWrs:
1433 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1434 case AArch64::SUBSWrx:
1435 return AArch64::SUBWrx;
1436 case AArch64::SUBSXrr:
1437 return AArch64::SUBXrr;
1438 case AArch64::SUBSXri:
1439 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1440 case AArch64::SUBSXrs:
1441 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1442 case AArch64::SUBSXrx:
1443 return AArch64::SUBXrx;
1444 }
1445}
1446
1447enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1448
1449/// True when condition flags are accessed (either by writing or reading)
1450/// on the instruction trace starting at From and ending at To.
1451///
1452/// Note: If From and To are from different blocks it's assumed CC are accessed
1453/// on the path.
1456 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1457 // Early exit if To is at the beginning of the BB.
1458 if (To == To->getParent()->begin())
1459 return true;
1460
1461 // Check whether the instructions are in the same basic block
1462 // If not, assume the condition flags might get modified somewhere.
1463 if (To->getParent() != From->getParent())
1464 return true;
1465
1466 // From must be above To.
1467 assert(std::any_of(
1468 ++To.getReverse(), To->getParent()->rend(),
1469 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1470
1471 // We iterate backward starting at \p To until we hit \p From.
1472 for (const MachineInstr &Instr :
1474 if (((AccessToCheck & AK_Write) &&
1475 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1476 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1477 return true;
1478 }
1479 return false;
1480}
1481
1482std::optional<unsigned>
1483AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1484 MachineInstr *Pred,
1485 const MachineRegisterInfo *MRI) const {
1486 unsigned MaskOpcode = Mask->getOpcode();
1487 unsigned PredOpcode = Pred->getOpcode();
1488 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1489 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1490
1491 if (PredIsWhileLike) {
1492 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1493 // instruction and the condition is "any" since WHILcc does an implicit
1494 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1495 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1496 return PredOpcode;
1497
1498 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1499 // redundant since WHILE performs an implicit PTEST with an all active
1500 // mask.
1501 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1502 getElementSizeForOpcode(MaskOpcode) ==
1503 getElementSizeForOpcode(PredOpcode))
1504 return PredOpcode;
1505
1506 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1507 // WHILEcc performs an implicit PTEST with an all active mask, setting
1508 // the N flag as the PTEST_FIRST would.
1509 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1510 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1511 return PredOpcode;
1512
1513 return {};
1514 }
1515
1516 if (PredIsPTestLike) {
1517 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1518 // instruction that sets the flags as PTEST would and the condition is
1519 // "any" since PG is always a subset of the governing predicate of the
1520 // ptest-like instruction.
1521 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1522 return PredOpcode;
1523
1524 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1525
1526 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1527 // to look through a copy and try again. This is because some instructions
1528 // take a predicate whose register class is a subset of its result class.
1529 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1530 PTestLikeMask->getOperand(1).getReg().isVirtual())
1531 PTestLikeMask =
1532 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1533
1534 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1535 // the element size matches and either the PTEST_LIKE instruction uses
1536 // the same all active mask or the condition is "any".
1537 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1538 getElementSizeForOpcode(MaskOpcode) ==
1539 getElementSizeForOpcode(PredOpcode)) {
1540 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1541 return PredOpcode;
1542 }
1543
1544 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1545 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1546 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1547 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1548 // performed by the compare could consider fewer lanes for these element
1549 // sizes.
1550 //
1551 // For example, consider
1552 //
1553 // ptrue p0.b ; P0=1111-1111-1111-1111
1554 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1555 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1556 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1557 // ; ^ last active
1558 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1559 // ; ^ last active
1560 //
1561 // where the compare generates a canonical all active 32-bit predicate
1562 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1563 // active flag, whereas the PTEST instruction with the same mask doesn't.
1564 // For PTEST_ANY this doesn't apply as the flags in this case would be
1565 // identical regardless of element size.
1566 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1567 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1568 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1569 return PredOpcode;
1570
1571 return {};
1572 }
1573
1574 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1575 // opcode so the PTEST becomes redundant.
1576 switch (PredOpcode) {
1577 case AArch64::AND_PPzPP:
1578 case AArch64::BIC_PPzPP:
1579 case AArch64::EOR_PPzPP:
1580 case AArch64::NAND_PPzPP:
1581 case AArch64::NOR_PPzPP:
1582 case AArch64::ORN_PPzPP:
1583 case AArch64::ORR_PPzPP:
1584 case AArch64::BRKA_PPzP:
1585 case AArch64::BRKPA_PPzPP:
1586 case AArch64::BRKB_PPzP:
1587 case AArch64::BRKPB_PPzPP:
1588 case AArch64::RDFFR_PPz: {
1589 // Check to see if our mask is the same. If not the resulting flag bits
1590 // may be different and we can't remove the ptest.
1591 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1592 if (Mask != PredMask)
1593 return {};
1594 break;
1595 }
1596 case AArch64::BRKN_PPzP: {
1597 // BRKN uses an all active implicit mask to set flags unlike the other
1598 // flag-setting instructions.
1599 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1600 if ((MaskOpcode != AArch64::PTRUE_B) ||
1601 (Mask->getOperand(1).getImm() != 31))
1602 return {};
1603 break;
1604 }
1605 case AArch64::PTRUE_B:
1606 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1607 break;
1608 default:
1609 // Bail out if we don't recognize the input
1610 return {};
1611 }
1612
1613 return convertToFlagSettingOpc(PredOpcode);
1614}
1615
1616/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1617/// operation which could set the flags in an identical manner
1618bool AArch64InstrInfo::optimizePTestInstr(
1619 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1620 const MachineRegisterInfo *MRI) const {
1621 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1622 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1623
1624 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1625 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1626 // before the branch to extract each subregister.
1627 auto Op = Pred->getOperand(1);
1628 if (Op.isReg() && Op.getReg().isVirtual() &&
1629 Op.getSubReg() == AArch64::psub0)
1630 Pred = MRI->getUniqueVRegDef(Op.getReg());
1631 }
1632
1633 unsigned PredOpcode = Pred->getOpcode();
1634 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1635 if (!NewOp)
1636 return false;
1637
1638 const TargetRegisterInfo *TRI = &getRegisterInfo();
1639
1640 // If another instruction between Pred and PTest accesses flags, don't remove
1641 // the ptest or update the earlier instruction to modify them.
1642 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1643 return false;
1644
1645 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1646 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1647 // operand to be replaced with an equivalent instruction that also sets the
1648 // flags.
1649 PTest->eraseFromParent();
1650 if (*NewOp != PredOpcode) {
1651 Pred->setDesc(get(*NewOp));
1652 bool succeeded = UpdateOperandRegClass(*Pred);
1653 (void)succeeded;
1654 assert(succeeded && "Operands have incompatible register classes!");
1655 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1656 }
1657
1658 // Ensure that the flags def is live.
1659 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1660 unsigned i = 0, e = Pred->getNumOperands();
1661 for (; i != e; ++i) {
1662 MachineOperand &MO = Pred->getOperand(i);
1663 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1664 MO.setIsDead(false);
1665 break;
1666 }
1667 }
1668 }
1669 return true;
1670}
1671
1672/// Try to optimize a compare instruction. A compare instruction is an
1673/// instruction which produces AArch64::NZCV. It can be truly compare
1674/// instruction
1675/// when there are no uses of its destination register.
1676///
1677/// The following steps are tried in order:
1678/// 1. Convert CmpInstr into an unconditional version.
1679/// 2. Remove CmpInstr if above there is an instruction producing a needed
1680/// condition code or an instruction which can be converted into such an
1681/// instruction.
1682/// Only comparison with zero is supported.
1684 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1685 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1686 assert(CmpInstr.getParent());
1687 assert(MRI);
1688
1689 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1690 int DeadNZCVIdx =
1691 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1692 if (DeadNZCVIdx != -1) {
1693 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1694 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1695 CmpInstr.eraseFromParent();
1696 return true;
1697 }
1698 unsigned Opc = CmpInstr.getOpcode();
1699 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1700 if (NewOpc == Opc)
1701 return false;
1702 const MCInstrDesc &MCID = get(NewOpc);
1703 CmpInstr.setDesc(MCID);
1704 CmpInstr.removeOperand(DeadNZCVIdx);
1705 bool succeeded = UpdateOperandRegClass(CmpInstr);
1706 (void)succeeded;
1707 assert(succeeded && "Some operands reg class are incompatible!");
1708 return true;
1709 }
1710
1711 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1712 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1713 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1714 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1715
1716 if (SrcReg2 != 0)
1717 return false;
1718
1719 // CmpInstr is a Compare instruction if destination register is not used.
1720 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1721 return false;
1722
1723 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1724 return true;
1725 return (CmpValue == 0 || CmpValue == 1) &&
1726 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1727}
1728
1729/// Get opcode of S version of Instr.
1730/// If Instr is S version its opcode is returned.
1731/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1732/// or we are not interested in it.
1733static unsigned sForm(MachineInstr &Instr) {
1734 switch (Instr.getOpcode()) {
1735 default:
1736 return AArch64::INSTRUCTION_LIST_END;
1737
1738 case AArch64::ADDSWrr:
1739 case AArch64::ADDSWri:
1740 case AArch64::ADDSXrr:
1741 case AArch64::ADDSXri:
1742 case AArch64::SUBSWrr:
1743 case AArch64::SUBSWri:
1744 case AArch64::SUBSXrr:
1745 case AArch64::SUBSXri:
1746 return Instr.getOpcode();
1747
1748 case AArch64::ADDWrr:
1749 return AArch64::ADDSWrr;
1750 case AArch64::ADDWri:
1751 return AArch64::ADDSWri;
1752 case AArch64::ADDXrr:
1753 return AArch64::ADDSXrr;
1754 case AArch64::ADDXri:
1755 return AArch64::ADDSXri;
1756 case AArch64::ADCWr:
1757 return AArch64::ADCSWr;
1758 case AArch64::ADCXr:
1759 return AArch64::ADCSXr;
1760 case AArch64::SUBWrr:
1761 return AArch64::SUBSWrr;
1762 case AArch64::SUBWri:
1763 return AArch64::SUBSWri;
1764 case AArch64::SUBXrr:
1765 return AArch64::SUBSXrr;
1766 case AArch64::SUBXri:
1767 return AArch64::SUBSXri;
1768 case AArch64::SBCWr:
1769 return AArch64::SBCSWr;
1770 case AArch64::SBCXr:
1771 return AArch64::SBCSXr;
1772 case AArch64::ANDWri:
1773 return AArch64::ANDSWri;
1774 case AArch64::ANDXri:
1775 return AArch64::ANDSXri;
1776 }
1777}
1778
1779/// Check if AArch64::NZCV should be alive in successors of MBB.
1781 for (auto *BB : MBB->successors())
1782 if (BB->isLiveIn(AArch64::NZCV))
1783 return true;
1784 return false;
1785}
1786
1787/// \returns The condition code operand index for \p Instr if it is a branch
1788/// or select and -1 otherwise.
1789static int
1791 switch (Instr.getOpcode()) {
1792 default:
1793 return -1;
1794
1795 case AArch64::Bcc: {
1796 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1797 assert(Idx >= 2);
1798 return Idx - 2;
1799 }
1800
1801 case AArch64::CSINVWr:
1802 case AArch64::CSINVXr:
1803 case AArch64::CSINCWr:
1804 case AArch64::CSINCXr:
1805 case AArch64::CSELWr:
1806 case AArch64::CSELXr:
1807 case AArch64::CSNEGWr:
1808 case AArch64::CSNEGXr:
1809 case AArch64::FCSELSrrr:
1810 case AArch64::FCSELDrrr: {
1811 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1812 assert(Idx >= 1);
1813 return Idx - 1;
1814 }
1815 }
1816}
1817
1818/// Find a condition code used by the instruction.
1819/// Returns AArch64CC::Invalid if either the instruction does not use condition
1820/// codes or we don't optimize CmpInstr in the presence of such instructions.
1823 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1824 Instr.getOperand(CCIdx).getImm())
1826}
1827
1830 UsedNZCV UsedFlags;
1831 switch (CC) {
1832 default:
1833 break;
1834
1835 case AArch64CC::EQ: // Z set
1836 case AArch64CC::NE: // Z clear
1837 UsedFlags.Z = true;
1838 break;
1839
1840 case AArch64CC::HI: // Z clear and C set
1841 case AArch64CC::LS: // Z set or C clear
1842 UsedFlags.Z = true;
1843 [[fallthrough]];
1844 case AArch64CC::HS: // C set
1845 case AArch64CC::LO: // C clear
1846 UsedFlags.C = true;
1847 break;
1848
1849 case AArch64CC::MI: // N set
1850 case AArch64CC::PL: // N clear
1851 UsedFlags.N = true;
1852 break;
1853
1854 case AArch64CC::VS: // V set
1855 case AArch64CC::VC: // V clear
1856 UsedFlags.V = true;
1857 break;
1858
1859 case AArch64CC::GT: // Z clear, N and V the same
1860 case AArch64CC::LE: // Z set, N and V differ
1861 UsedFlags.Z = true;
1862 [[fallthrough]];
1863 case AArch64CC::GE: // N and V the same
1864 case AArch64CC::LT: // N and V differ
1865 UsedFlags.N = true;
1866 UsedFlags.V = true;
1867 break;
1868 }
1869 return UsedFlags;
1870}
1871
1872/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1873/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1874/// \returns std::nullopt otherwise.
1875///
1876/// Collect instructions using that flags in \p CCUseInstrs if provided.
1877std::optional<UsedNZCV>
1879 const TargetRegisterInfo &TRI,
1880 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1881 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1882 if (MI.getParent() != CmpParent)
1883 return std::nullopt;
1884
1885 if (areCFlagsAliveInSuccessors(CmpParent))
1886 return std::nullopt;
1887
1888 UsedNZCV NZCVUsedAfterCmp;
1890 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1891 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1893 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1894 return std::nullopt;
1895 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1896 if (CCUseInstrs)
1897 CCUseInstrs->push_back(&Instr);
1898 }
1899 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1900 break;
1901 }
1902 return NZCVUsedAfterCmp;
1903}
1904
1905static bool isADDSRegImm(unsigned Opcode) {
1906 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1907}
1908
1909static bool isSUBSRegImm(unsigned Opcode) {
1910 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1911}
1912
1913/// Check if CmpInstr can be substituted by MI.
1914///
1915/// CmpInstr can be substituted:
1916/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1917/// - and, MI and CmpInstr are from the same MachineBB
1918/// - and, condition flags are not alive in successors of the CmpInstr parent
1919/// - and, if MI opcode is the S form there must be no defs of flags between
1920/// MI and CmpInstr
1921/// or if MI opcode is not the S form there must be neither defs of flags
1922/// nor uses of flags between MI and CmpInstr.
1923/// - and, if C/V flags are not used after CmpInstr
1924/// or if N flag is used but MI produces poison value if signed overflow
1925/// occurs.
1927 const TargetRegisterInfo &TRI) {
1928 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1929 // that may or may not set flags.
1930 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1931
1932 const unsigned CmpOpcode = CmpInstr.getOpcode();
1933 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1934 return false;
1935
1936 assert((CmpInstr.getOperand(2).isImm() &&
1937 CmpInstr.getOperand(2).getImm() == 0) &&
1938 "Caller guarantees that CmpInstr compares with constant 0");
1939
1940 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1941 if (!NZVCUsed || NZVCUsed->C)
1942 return false;
1943
1944 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1945 // '%vreg = add ...' or '%vreg = sub ...'.
1946 // Condition flag V is used to indicate signed overflow.
1947 // 1) MI and CmpInstr set N and V to the same value.
1948 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1949 // signed overflow occurs, so CmpInstr could still be simplified away.
1950 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1951 return false;
1952
1953 AccessKind AccessToCheck = AK_Write;
1954 if (sForm(MI) != MI.getOpcode())
1955 AccessToCheck = AK_All;
1956 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1957}
1958
1959/// Substitute an instruction comparing to zero with another instruction
1960/// which produces needed condition flags.
1961///
1962/// Return true on success.
1963bool AArch64InstrInfo::substituteCmpToZero(
1964 MachineInstr &CmpInstr, unsigned SrcReg,
1965 const MachineRegisterInfo &MRI) const {
1966 // Get the unique definition of SrcReg.
1967 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1968 if (!MI)
1969 return false;
1970
1971 const TargetRegisterInfo &TRI = getRegisterInfo();
1972
1973 unsigned NewOpc = sForm(*MI);
1974 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1975 return false;
1976
1977 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1978 return false;
1979
1980 // Update the instruction to set NZCV.
1981 MI->setDesc(get(NewOpc));
1982 CmpInstr.eraseFromParent();
1984 (void)succeeded;
1985 assert(succeeded && "Some operands reg class are incompatible!");
1986 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1987 return true;
1988}
1989
1990/// \returns True if \p CmpInstr can be removed.
1991///
1992/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1993/// codes used in \p CCUseInstrs must be inverted.
1995 int CmpValue, const TargetRegisterInfo &TRI,
1997 bool &IsInvertCC) {
1998 assert((CmpValue == 0 || CmpValue == 1) &&
1999 "Only comparisons to 0 or 1 considered for removal!");
2000
2001 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2002 unsigned MIOpc = MI.getOpcode();
2003 if (MIOpc == AArch64::CSINCWr) {
2004 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2005 MI.getOperand(2).getReg() != AArch64::WZR)
2006 return false;
2007 } else if (MIOpc == AArch64::CSINCXr) {
2008 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2009 MI.getOperand(2).getReg() != AArch64::XZR)
2010 return false;
2011 } else {
2012 return false;
2013 }
2015 if (MICC == AArch64CC::Invalid)
2016 return false;
2017
2018 // NZCV needs to be defined
2019 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2020 return false;
2021
2022 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2023 const unsigned CmpOpcode = CmpInstr.getOpcode();
2024 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2025 if (CmpValue && !IsSubsRegImm)
2026 return false;
2027 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2028 return false;
2029
2030 // MI conditions allowed: eq, ne, mi, pl
2031 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2032 if (MIUsedNZCV.C || MIUsedNZCV.V)
2033 return false;
2034
2035 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2036 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2037 // Condition flags are not used in CmpInstr basic block successors and only
2038 // Z or N flags allowed to be used after CmpInstr within its basic block
2039 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2040 return false;
2041 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2042 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2043 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2044 return false;
2045 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2046 if (MIUsedNZCV.N && !CmpValue)
2047 return false;
2048
2049 // There must be no defs of flags between MI and CmpInstr
2050 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2051 return false;
2052
2053 // Condition code is inverted in the following cases:
2054 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2055 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2056 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2057 (!CmpValue && MICC == AArch64CC::NE);
2058 return true;
2059}
2060
2061/// Remove comparison in csinc-cmp sequence
2062///
2063/// Examples:
2064/// 1. \code
2065/// csinc w9, wzr, wzr, ne
2066/// cmp w9, #0
2067/// b.eq
2068/// \endcode
2069/// to
2070/// \code
2071/// csinc w9, wzr, wzr, ne
2072/// b.ne
2073/// \endcode
2074///
2075/// 2. \code
2076/// csinc x2, xzr, xzr, mi
2077/// cmp x2, #1
2078/// b.pl
2079/// \endcode
2080/// to
2081/// \code
2082/// csinc x2, xzr, xzr, mi
2083/// b.pl
2084/// \endcode
2085///
2086/// \param CmpInstr comparison instruction
2087/// \return True when comparison removed
2088bool AArch64InstrInfo::removeCmpToZeroOrOne(
2089 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2090 const MachineRegisterInfo &MRI) const {
2091 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2092 if (!MI)
2093 return false;
2094 const TargetRegisterInfo &TRI = getRegisterInfo();
2095 SmallVector<MachineInstr *, 4> CCUseInstrs;
2096 bool IsInvertCC = false;
2097 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2098 IsInvertCC))
2099 return false;
2100 // Make transformation
2101 CmpInstr.eraseFromParent();
2102 if (IsInvertCC) {
2103 // Invert condition codes in CmpInstr CC users
2104 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2105 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2106 assert(Idx >= 0 && "Unexpected instruction using CC.");
2107 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2109 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2110 CCOperand.setImm(CCUse);
2111 }
2112 }
2113 return true;
2114}
2115
2116bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2117 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2118 MI.getOpcode() != AArch64::CATCHRET)
2119 return false;
2120
2121 MachineBasicBlock &MBB = *MI.getParent();
2122 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2123 auto TRI = Subtarget.getRegisterInfo();
2124 DebugLoc DL = MI.getDebugLoc();
2125
2126 if (MI.getOpcode() == AArch64::CATCHRET) {
2127 // Skip to the first instruction before the epilog.
2128 const TargetInstrInfo *TII =
2130 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2132 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2133 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2134 FirstEpilogSEH != MBB.begin())
2135 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2136 if (FirstEpilogSEH != MBB.begin())
2137 FirstEpilogSEH = std::next(FirstEpilogSEH);
2138 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2139 .addReg(AArch64::X0, RegState::Define)
2140 .addMBB(TargetMBB);
2141 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2142 .addReg(AArch64::X0, RegState::Define)
2143 .addReg(AArch64::X0)
2144 .addMBB(TargetMBB)
2145 .addImm(0);
2146 TargetMBB->setMachineBlockAddressTaken();
2147 return true;
2148 }
2149
2150 Register Reg = MI.getOperand(0).getReg();
2152 if (M.getStackProtectorGuard() == "sysreg") {
2153 const AArch64SysReg::SysReg *SrcReg =
2154 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2155 if (!SrcReg)
2156 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2157
2158 // mrs xN, sysreg
2159 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2161 .addImm(SrcReg->Encoding);
2162 int Offset = M.getStackProtectorGuardOffset();
2163 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2164 // ldr xN, [xN, #offset]
2165 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2166 .addDef(Reg)
2168 .addImm(Offset / 8);
2169 } else if (Offset >= -256 && Offset <= 255) {
2170 // ldur xN, [xN, #offset]
2171 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2172 .addDef(Reg)
2174 .addImm(Offset);
2175 } else if (Offset >= -4095 && Offset <= 4095) {
2176 if (Offset > 0) {
2177 // add xN, xN, #offset
2178 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2179 .addDef(Reg)
2181 .addImm(Offset)
2182 .addImm(0);
2183 } else {
2184 // sub xN, xN, #offset
2185 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2186 .addDef(Reg)
2188 .addImm(-Offset)
2189 .addImm(0);
2190 }
2191 // ldr xN, [xN]
2192 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2193 .addDef(Reg)
2195 .addImm(0);
2196 } else {
2197 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2198 // than 23760.
2199 // It might be nice to use AArch64::MOVi32imm here, which would get
2200 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2201 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2202 // AArch64FrameLowering might help us find such a scratch register
2203 // though. If we failed to find a scratch register, we could emit a
2204 // stream of add instructions to build up the immediate. Or, we could try
2205 // to insert a AArch64::MOVi32imm before register allocation so that we
2206 // didn't need to scavenge for a scratch register.
2207 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2208 }
2209 MBB.erase(MI);
2210 return true;
2211 }
2212
2213 const GlobalValue *GV =
2214 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2215 const TargetMachine &TM = MBB.getParent()->getTarget();
2216 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2217 const unsigned char MO_NC = AArch64II::MO_NC;
2218
2219 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2220 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2221 .addGlobalAddress(GV, 0, OpFlags);
2222 if (Subtarget.isTargetILP32()) {
2223 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2224 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2225 .addDef(Reg32, RegState::Dead)
2227 .addImm(0)
2228 .addMemOperand(*MI.memoperands_begin())
2230 } else {
2231 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2233 .addImm(0)
2234 .addMemOperand(*MI.memoperands_begin());
2235 }
2236 } else if (TM.getCodeModel() == CodeModel::Large) {
2237 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2238 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2239 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2240 .addImm(0);
2241 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2243 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2244 .addImm(16);
2245 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2247 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2248 .addImm(32);
2249 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2252 .addImm(48);
2253 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2255 .addImm(0)
2256 .addMemOperand(*MI.memoperands_begin());
2257 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2258 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2259 .addGlobalAddress(GV, 0, OpFlags);
2260 } else {
2261 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2262 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2263 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2264 if (Subtarget.isTargetILP32()) {
2265 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2266 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2267 .addDef(Reg32, RegState::Dead)
2269 .addGlobalAddress(GV, 0, LoFlags)
2270 .addMemOperand(*MI.memoperands_begin())
2272 } else {
2273 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2275 .addGlobalAddress(GV, 0, LoFlags)
2276 .addMemOperand(*MI.memoperands_begin());
2277 }
2278 }
2279
2280 MBB.erase(MI);
2281
2282 return true;
2283}
2284
2285// Return true if this instruction simply sets its single destination register
2286// to zero. This is equivalent to a register rename of the zero-register.
2288 switch (MI.getOpcode()) {
2289 default:
2290 break;
2291 case AArch64::MOVZWi:
2292 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2293 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2294 assert(MI.getDesc().getNumOperands() == 3 &&
2295 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2296 return true;
2297 }
2298 break;
2299 case AArch64::ANDWri: // and Rd, Rzr, #imm
2300 return MI.getOperand(1).getReg() == AArch64::WZR;
2301 case AArch64::ANDXri:
2302 return MI.getOperand(1).getReg() == AArch64::XZR;
2303 case TargetOpcode::COPY:
2304 return MI.getOperand(1).getReg() == AArch64::WZR;
2305 }
2306 return false;
2307}
2308
2309// Return true if this instruction simply renames a general register without
2310// modifying bits.
2312 switch (MI.getOpcode()) {
2313 default:
2314 break;
2315 case TargetOpcode::COPY: {
2316 // GPR32 copies will by lowered to ORRXrs
2317 Register DstReg = MI.getOperand(0).getReg();
2318 return (AArch64::GPR32RegClass.contains(DstReg) ||
2319 AArch64::GPR64RegClass.contains(DstReg));
2320 }
2321 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2322 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2323 assert(MI.getDesc().getNumOperands() == 4 &&
2324 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2325 return true;
2326 }
2327 break;
2328 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2329 if (MI.getOperand(2).getImm() == 0) {
2330 assert(MI.getDesc().getNumOperands() == 4 &&
2331 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2332 return true;
2333 }
2334 break;
2335 }
2336 return false;
2337}
2338
2339// Return true if this instruction simply renames a general register without
2340// modifying bits.
2342 switch (MI.getOpcode()) {
2343 default:
2344 break;
2345 case TargetOpcode::COPY: {
2346 Register DstReg = MI.getOperand(0).getReg();
2347 return AArch64::FPR128RegClass.contains(DstReg);
2348 }
2349 case AArch64::ORRv16i8:
2350 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2351 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2352 "invalid ORRv16i8 operands");
2353 return true;
2354 }
2355 break;
2356 }
2357 return false;
2358}
2359
2361 int &FrameIndex) const {
2362 switch (MI.getOpcode()) {
2363 default:
2364 break;
2365 case AArch64::LDRWui:
2366 case AArch64::LDRXui:
2367 case AArch64::LDRBui:
2368 case AArch64::LDRHui:
2369 case AArch64::LDRSui:
2370 case AArch64::LDRDui:
2371 case AArch64::LDRQui:
2372 case AArch64::LDR_PXI:
2373 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2374 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2375 FrameIndex = MI.getOperand(1).getIndex();
2376 return MI.getOperand(0).getReg();
2377 }
2378 break;
2379 }
2380
2381 return 0;
2382}
2383
2385 int &FrameIndex) const {
2386 switch (MI.getOpcode()) {
2387 default:
2388 break;
2389 case AArch64::STRWui:
2390 case AArch64::STRXui:
2391 case AArch64::STRBui:
2392 case AArch64::STRHui:
2393 case AArch64::STRSui:
2394 case AArch64::STRDui:
2395 case AArch64::STRQui:
2396 case AArch64::STR_PXI:
2397 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2398 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2399 FrameIndex = MI.getOperand(1).getIndex();
2400 return MI.getOperand(0).getReg();
2401 }
2402 break;
2403 }
2404 return 0;
2405}
2406
2407/// Check all MachineMemOperands for a hint to suppress pairing.
2409 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2410 return MMO->getFlags() & MOSuppressPair;
2411 });
2412}
2413
2414/// Set a flag on the first MachineMemOperand to suppress pairing.
2416 if (MI.memoperands_empty())
2417 return;
2418 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2419}
2420
2421/// Check all MachineMemOperands for a hint that the load/store is strided.
2423 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2424 return MMO->getFlags() & MOStridedAccess;
2425 });
2426}
2427
2429 switch (Opc) {
2430 default:
2431 return false;
2432 case AArch64::STURSi:
2433 case AArch64::STRSpre:
2434 case AArch64::STURDi:
2435 case AArch64::STRDpre:
2436 case AArch64::STURQi:
2437 case AArch64::STRQpre:
2438 case AArch64::STURBBi:
2439 case AArch64::STURHHi:
2440 case AArch64::STURWi:
2441 case AArch64::STRWpre:
2442 case AArch64::STURXi:
2443 case AArch64::STRXpre:
2444 case AArch64::LDURSi:
2445 case AArch64::LDRSpre:
2446 case AArch64::LDURDi:
2447 case AArch64::LDRDpre:
2448 case AArch64::LDURQi:
2449 case AArch64::LDRQpre:
2450 case AArch64::LDURWi:
2451 case AArch64::LDRWpre:
2452 case AArch64::LDURXi:
2453 case AArch64::LDRXpre:
2454 case AArch64::LDRSWpre:
2455 case AArch64::LDURSWi:
2456 case AArch64::LDURHHi:
2457 case AArch64::LDURBBi:
2458 case AArch64::LDURSBWi:
2459 case AArch64::LDURSHWi:
2460 return true;
2461 }
2462}
2463
2464std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2465 switch (Opc) {
2466 default: return {};
2467 case AArch64::PRFMui: return AArch64::PRFUMi;
2468 case AArch64::LDRXui: return AArch64::LDURXi;
2469 case AArch64::LDRWui: return AArch64::LDURWi;
2470 case AArch64::LDRBui: return AArch64::LDURBi;
2471 case AArch64::LDRHui: return AArch64::LDURHi;
2472 case AArch64::LDRSui: return AArch64::LDURSi;
2473 case AArch64::LDRDui: return AArch64::LDURDi;
2474 case AArch64::LDRQui: return AArch64::LDURQi;
2475 case AArch64::LDRBBui: return AArch64::LDURBBi;
2476 case AArch64::LDRHHui: return AArch64::LDURHHi;
2477 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2478 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2479 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2480 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2481 case AArch64::LDRSWui: return AArch64::LDURSWi;
2482 case AArch64::STRXui: return AArch64::STURXi;
2483 case AArch64::STRWui: return AArch64::STURWi;
2484 case AArch64::STRBui: return AArch64::STURBi;
2485 case AArch64::STRHui: return AArch64::STURHi;
2486 case AArch64::STRSui: return AArch64::STURSi;
2487 case AArch64::STRDui: return AArch64::STURDi;
2488 case AArch64::STRQui: return AArch64::STURQi;
2489 case AArch64::STRBBui: return AArch64::STURBBi;
2490 case AArch64::STRHHui: return AArch64::STURHHi;
2491 }
2492}
2493
2495 switch (Opc) {
2496 default:
2497 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2498 case AArch64::ADDG:
2499 case AArch64::LDAPURBi:
2500 case AArch64::LDAPURHi:
2501 case AArch64::LDAPURi:
2502 case AArch64::LDAPURSBWi:
2503 case AArch64::LDAPURSBXi:
2504 case AArch64::LDAPURSHWi:
2505 case AArch64::LDAPURSHXi:
2506 case AArch64::LDAPURSWi:
2507 case AArch64::LDAPURXi:
2508 case AArch64::LDR_PPXI:
2509 case AArch64::LDR_PXI:
2510 case AArch64::LDR_ZXI:
2511 case AArch64::LDR_ZZXI:
2512 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2513 case AArch64::LDR_ZZZXI:
2514 case AArch64::LDR_ZZZZXI:
2515 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2516 case AArch64::LDRBBui:
2517 case AArch64::LDRBui:
2518 case AArch64::LDRDui:
2519 case AArch64::LDRHHui:
2520 case AArch64::LDRHui:
2521 case AArch64::LDRQui:
2522 case AArch64::LDRSBWui:
2523 case AArch64::LDRSBXui:
2524 case AArch64::LDRSHWui:
2525 case AArch64::LDRSHXui:
2526 case AArch64::LDRSui:
2527 case AArch64::LDRSWui:
2528 case AArch64::LDRWui:
2529 case AArch64::LDRXui:
2530 case AArch64::LDURBBi:
2531 case AArch64::LDURBi:
2532 case AArch64::LDURDi:
2533 case AArch64::LDURHHi:
2534 case AArch64::LDURHi:
2535 case AArch64::LDURQi:
2536 case AArch64::LDURSBWi:
2537 case AArch64::LDURSBXi:
2538 case AArch64::LDURSHWi:
2539 case AArch64::LDURSHXi:
2540 case AArch64::LDURSi:
2541 case AArch64::LDURSWi:
2542 case AArch64::LDURWi:
2543 case AArch64::LDURXi:
2544 case AArch64::PRFMui:
2545 case AArch64::PRFUMi:
2546 case AArch64::ST2Gi:
2547 case AArch64::STGi:
2548 case AArch64::STLURBi:
2549 case AArch64::STLURHi:
2550 case AArch64::STLURWi:
2551 case AArch64::STLURXi:
2552 case AArch64::StoreSwiftAsyncContext:
2553 case AArch64::STR_PPXI:
2554 case AArch64::STR_PXI:
2555 case AArch64::STR_ZXI:
2556 case AArch64::STR_ZZXI:
2557 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2558 case AArch64::STR_ZZZXI:
2559 case AArch64::STR_ZZZZXI:
2560 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2561 case AArch64::STRBBui:
2562 case AArch64::STRBui:
2563 case AArch64::STRDui:
2564 case AArch64::STRHHui:
2565 case AArch64::STRHui:
2566 case AArch64::STRQui:
2567 case AArch64::STRSui:
2568 case AArch64::STRWui:
2569 case AArch64::STRXui:
2570 case AArch64::STURBBi:
2571 case AArch64::STURBi:
2572 case AArch64::STURDi:
2573 case AArch64::STURHHi:
2574 case AArch64::STURHi:
2575 case AArch64::STURQi:
2576 case AArch64::STURSi:
2577 case AArch64::STURWi:
2578 case AArch64::STURXi:
2579 case AArch64::STZ2Gi:
2580 case AArch64::STZGi:
2581 case AArch64::TAGPstack:
2582 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
2583 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
2584 return 2;
2585 case AArch64::LD1B_D_IMM:
2586 case AArch64::LD1B_H_IMM:
2587 case AArch64::LD1B_IMM:
2588 case AArch64::LD1B_S_IMM:
2589 case AArch64::LD1D_IMM:
2590 case AArch64::LD1H_D_IMM:
2591 case AArch64::LD1H_IMM:
2592 case AArch64::LD1H_S_IMM:
2593 case AArch64::LD1RB_D_IMM:
2594 case AArch64::LD1RB_H_IMM:
2595 case AArch64::LD1RB_IMM:
2596 case AArch64::LD1RB_S_IMM:
2597 case AArch64::LD1RD_IMM:
2598 case AArch64::LD1RH_D_IMM:
2599 case AArch64::LD1RH_IMM:
2600 case AArch64::LD1RH_S_IMM:
2601 case AArch64::LD1RSB_D_IMM:
2602 case AArch64::LD1RSB_H_IMM:
2603 case AArch64::LD1RSB_S_IMM:
2604 case AArch64::LD1RSH_D_IMM:
2605 case AArch64::LD1RSH_S_IMM:
2606 case AArch64::LD1RSW_IMM:
2607 case AArch64::LD1RW_D_IMM:
2608 case AArch64::LD1RW_IMM:
2609 case AArch64::LD1SB_D_IMM:
2610 case AArch64::LD1SB_H_IMM:
2611 case AArch64::LD1SB_S_IMM:
2612 case AArch64::LD1SH_D_IMM:
2613 case AArch64::LD1SH_S_IMM:
2614 case AArch64::LD1SW_D_IMM:
2615 case AArch64::LD1W_D_IMM:
2616 case AArch64::LD1W_IMM:
2617 case AArch64::LD2B_IMM:
2618 case AArch64::LD2D_IMM:
2619 case AArch64::LD2H_IMM:
2620 case AArch64::LD2W_IMM:
2621 case AArch64::LD3B_IMM:
2622 case AArch64::LD3D_IMM:
2623 case AArch64::LD3H_IMM:
2624 case AArch64::LD3W_IMM:
2625 case AArch64::LD4B_IMM:
2626 case AArch64::LD4D_IMM:
2627 case AArch64::LD4H_IMM:
2628 case AArch64::LD4W_IMM:
2629 case AArch64::LDG:
2630 case AArch64::LDNF1B_D_IMM:
2631 case AArch64::LDNF1B_H_IMM:
2632 case AArch64::LDNF1B_IMM:
2633 case AArch64::LDNF1B_S_IMM:
2634 case AArch64::LDNF1D_IMM:
2635 case AArch64::LDNF1H_D_IMM:
2636 case AArch64::LDNF1H_IMM:
2637 case AArch64::LDNF1H_S_IMM:
2638 case AArch64::LDNF1SB_D_IMM:
2639 case AArch64::LDNF1SB_H_IMM:
2640 case AArch64::LDNF1SB_S_IMM:
2641 case AArch64::LDNF1SH_D_IMM:
2642 case AArch64::LDNF1SH_S_IMM:
2643 case AArch64::LDNF1SW_D_IMM:
2644 case AArch64::LDNF1W_D_IMM:
2645 case AArch64::LDNF1W_IMM:
2646 case AArch64::LDNPDi:
2647 case AArch64::LDNPQi:
2648 case AArch64::LDNPSi:
2649 case AArch64::LDNPWi:
2650 case AArch64::LDNPXi:
2651 case AArch64::LDNT1B_ZRI:
2652 case AArch64::LDNT1D_ZRI:
2653 case AArch64::LDNT1H_ZRI:
2654 case AArch64::LDNT1W_ZRI:
2655 case AArch64::LDPDi:
2656 case AArch64::LDPQi:
2657 case AArch64::LDPSi:
2658 case AArch64::LDPWi:
2659 case AArch64::LDPXi:
2660 case AArch64::LDRBBpost:
2661 case AArch64::LDRBBpre:
2662 case AArch64::LDRBpost:
2663 case AArch64::LDRBpre:
2664 case AArch64::LDRDpost:
2665 case AArch64::LDRDpre:
2666 case AArch64::LDRHHpost:
2667 case AArch64::LDRHHpre:
2668 case AArch64::LDRHpost:
2669 case AArch64::LDRHpre:
2670 case AArch64::LDRQpost:
2671 case AArch64::LDRQpre:
2672 case AArch64::LDRSpost:
2673 case AArch64::LDRSpre:
2674 case AArch64::LDRWpost:
2675 case AArch64::LDRWpre:
2676 case AArch64::LDRXpost:
2677 case AArch64::LDRXpre:
2678 case AArch64::ST1B_D_IMM:
2679 case AArch64::ST1B_H_IMM:
2680 case AArch64::ST1B_IMM:
2681 case AArch64::ST1B_S_IMM:
2682 case AArch64::ST1D_IMM:
2683 case AArch64::ST1H_D_IMM:
2684 case AArch64::ST1H_IMM:
2685 case AArch64::ST1H_S_IMM:
2686 case AArch64::ST1W_D_IMM:
2687 case AArch64::ST1W_IMM:
2688 case AArch64::ST2B_IMM:
2689 case AArch64::ST2D_IMM:
2690 case AArch64::ST2H_IMM:
2691 case AArch64::ST2W_IMM:
2692 case AArch64::ST3B_IMM:
2693 case AArch64::ST3D_IMM:
2694 case AArch64::ST3H_IMM:
2695 case AArch64::ST3W_IMM:
2696 case AArch64::ST4B_IMM:
2697 case AArch64::ST4D_IMM:
2698 case AArch64::ST4H_IMM:
2699 case AArch64::ST4W_IMM:
2700 case AArch64::STGPi:
2701 case AArch64::STGPreIndex:
2702 case AArch64::STZGPreIndex:
2703 case AArch64::ST2GPreIndex:
2704 case AArch64::STZ2GPreIndex:
2705 case AArch64::STGPostIndex:
2706 case AArch64::STZGPostIndex:
2707 case AArch64::ST2GPostIndex:
2708 case AArch64::STZ2GPostIndex:
2709 case AArch64::STNPDi:
2710 case AArch64::STNPQi:
2711 case AArch64::STNPSi:
2712 case AArch64::STNPWi:
2713 case AArch64::STNPXi:
2714 case AArch64::STNT1B_ZRI:
2715 case AArch64::STNT1D_ZRI:
2716 case AArch64::STNT1H_ZRI:
2717 case AArch64::STNT1W_ZRI:
2718 case AArch64::STPDi:
2719 case AArch64::STPQi:
2720 case AArch64::STPSi:
2721 case AArch64::STPWi:
2722 case AArch64::STPXi:
2723 case AArch64::STRBBpost:
2724 case AArch64::STRBBpre:
2725 case AArch64::STRBpost:
2726 case AArch64::STRBpre:
2727 case AArch64::STRDpost:
2728 case AArch64::STRDpre:
2729 case AArch64::STRHHpost:
2730 case AArch64::STRHHpre:
2731 case AArch64::STRHpost:
2732 case AArch64::STRHpre:
2733 case AArch64::STRQpost:
2734 case AArch64::STRQpre:
2735 case AArch64::STRSpost:
2736 case AArch64::STRSpre:
2737 case AArch64::STRWpost:
2738 case AArch64::STRWpre:
2739 case AArch64::STRXpost:
2740 case AArch64::STRXpre:
2741 return 3;
2742 case AArch64::LDPDpost:
2743 case AArch64::LDPDpre:
2744 case AArch64::LDPQpost:
2745 case AArch64::LDPQpre:
2746 case AArch64::LDPSpost:
2747 case AArch64::LDPSpre:
2748 case AArch64::LDPWpost:
2749 case AArch64::LDPWpre:
2750 case AArch64::LDPXpost:
2751 case AArch64::LDPXpre:
2752 case AArch64::STGPpre:
2753 case AArch64::STGPpost:
2754 case AArch64::STPDpost:
2755 case AArch64::STPDpre:
2756 case AArch64::STPQpost:
2757 case AArch64::STPQpre:
2758 case AArch64::STPSpost:
2759 case AArch64::STPSpre:
2760 case AArch64::STPWpost:
2761 case AArch64::STPWpre:
2762 case AArch64::STPXpost:
2763 case AArch64::STPXpre:
2764 return 4;
2765 }
2766}
2767
2769 switch (MI.getOpcode()) {
2770 default:
2771 return false;
2772 // Scaled instructions.
2773 case AArch64::STRSui:
2774 case AArch64::STRDui:
2775 case AArch64::STRQui:
2776 case AArch64::STRXui:
2777 case AArch64::STRWui:
2778 case AArch64::LDRSui:
2779 case AArch64::LDRDui:
2780 case AArch64::LDRQui:
2781 case AArch64::LDRXui:
2782 case AArch64::LDRWui:
2783 case AArch64::LDRSWui:
2784 // Unscaled instructions.
2785 case AArch64::STURSi:
2786 case AArch64::STRSpre:
2787 case AArch64::STURDi:
2788 case AArch64::STRDpre:
2789 case AArch64::STURQi:
2790 case AArch64::STRQpre:
2791 case AArch64::STURWi:
2792 case AArch64::STRWpre:
2793 case AArch64::STURXi:
2794 case AArch64::STRXpre:
2795 case AArch64::LDURSi:
2796 case AArch64::LDRSpre:
2797 case AArch64::LDURDi:
2798 case AArch64::LDRDpre:
2799 case AArch64::LDURQi:
2800 case AArch64::LDRQpre:
2801 case AArch64::LDURWi:
2802 case AArch64::LDRWpre:
2803 case AArch64::LDURXi:
2804 case AArch64::LDRXpre:
2805 case AArch64::LDURSWi:
2806 case AArch64::LDRSWpre:
2807 // SVE instructions.
2808 case AArch64::LDR_ZXI:
2809 case AArch64::STR_ZXI:
2810 return true;
2811 }
2812}
2813
2815 switch (MI.getOpcode()) {
2816 default:
2817 assert((!MI.isCall() || !MI.isReturn()) &&
2818 "Unexpected instruction - was a new tail call opcode introduced?");
2819 return false;
2820 case AArch64::TCRETURNdi:
2821 case AArch64::TCRETURNri:
2822 case AArch64::TCRETURNrix16x17:
2823 case AArch64::TCRETURNrix17:
2824 case AArch64::TCRETURNrinotx16:
2825 case AArch64::TCRETURNriALL:
2826 case AArch64::AUTH_TCRETURN:
2827 case AArch64::AUTH_TCRETURN_BTI:
2828 return true;
2829 }
2830}
2831
2833 switch (Opc) {
2834 default:
2835 llvm_unreachable("Opcode has no flag setting equivalent!");
2836 // 32-bit cases:
2837 case AArch64::ADDWri:
2838 return AArch64::ADDSWri;
2839 case AArch64::ADDWrr:
2840 return AArch64::ADDSWrr;
2841 case AArch64::ADDWrs:
2842 return AArch64::ADDSWrs;
2843 case AArch64::ADDWrx:
2844 return AArch64::ADDSWrx;
2845 case AArch64::ANDWri:
2846 return AArch64::ANDSWri;
2847 case AArch64::ANDWrr:
2848 return AArch64::ANDSWrr;
2849 case AArch64::ANDWrs:
2850 return AArch64::ANDSWrs;
2851 case AArch64::BICWrr:
2852 return AArch64::BICSWrr;
2853 case AArch64::BICWrs:
2854 return AArch64::BICSWrs;
2855 case AArch64::SUBWri:
2856 return AArch64::SUBSWri;
2857 case AArch64::SUBWrr:
2858 return AArch64::SUBSWrr;
2859 case AArch64::SUBWrs:
2860 return AArch64::SUBSWrs;
2861 case AArch64::SUBWrx:
2862 return AArch64::SUBSWrx;
2863 // 64-bit cases:
2864 case AArch64::ADDXri:
2865 return AArch64::ADDSXri;
2866 case AArch64::ADDXrr:
2867 return AArch64::ADDSXrr;
2868 case AArch64::ADDXrs:
2869 return AArch64::ADDSXrs;
2870 case AArch64::ADDXrx:
2871 return AArch64::ADDSXrx;
2872 case AArch64::ANDXri:
2873 return AArch64::ANDSXri;
2874 case AArch64::ANDXrr:
2875 return AArch64::ANDSXrr;
2876 case AArch64::ANDXrs:
2877 return AArch64::ANDSXrs;
2878 case AArch64::BICXrr:
2879 return AArch64::BICSXrr;
2880 case AArch64::BICXrs:
2881 return AArch64::BICSXrs;
2882 case AArch64::SUBXri:
2883 return AArch64::SUBSXri;
2884 case AArch64::SUBXrr:
2885 return AArch64::SUBSXrr;
2886 case AArch64::SUBXrs:
2887 return AArch64::SUBSXrs;
2888 case AArch64::SUBXrx:
2889 return AArch64::SUBSXrx;
2890 // SVE instructions:
2891 case AArch64::AND_PPzPP:
2892 return AArch64::ANDS_PPzPP;
2893 case AArch64::BIC_PPzPP:
2894 return AArch64::BICS_PPzPP;
2895 case AArch64::EOR_PPzPP:
2896 return AArch64::EORS_PPzPP;
2897 case AArch64::NAND_PPzPP:
2898 return AArch64::NANDS_PPzPP;
2899 case AArch64::NOR_PPzPP:
2900 return AArch64::NORS_PPzPP;
2901 case AArch64::ORN_PPzPP:
2902 return AArch64::ORNS_PPzPP;
2903 case AArch64::ORR_PPzPP:
2904 return AArch64::ORRS_PPzPP;
2905 case AArch64::BRKA_PPzP:
2906 return AArch64::BRKAS_PPzP;
2907 case AArch64::BRKPA_PPzPP:
2908 return AArch64::BRKPAS_PPzPP;
2909 case AArch64::BRKB_PPzP:
2910 return AArch64::BRKBS_PPzP;
2911 case AArch64::BRKPB_PPzPP:
2912 return AArch64::BRKPBS_PPzPP;
2913 case AArch64::BRKN_PPzP:
2914 return AArch64::BRKNS_PPzP;
2915 case AArch64::RDFFR_PPz:
2916 return AArch64::RDFFRS_PPz;
2917 case AArch64::PTRUE_B:
2918 return AArch64::PTRUES_B;
2919 }
2920}
2921
2922// Is this a candidate for ld/st merging or pairing? For example, we don't
2923// touch volatiles or load/stores that have a hint to avoid pair formation.
2925
2926 bool IsPreLdSt = isPreLdSt(MI);
2927
2928 // If this is a volatile load/store, don't mess with it.
2929 if (MI.hasOrderedMemoryRef())
2930 return false;
2931
2932 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2933 // For Pre-inc LD/ST, the operand is shifted by one.
2934 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2935 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2936 "Expected a reg or frame index operand.");
2937
2938 // For Pre-indexed addressing quadword instructions, the third operand is the
2939 // immediate value.
2940 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2941
2942 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2943 return false;
2944
2945 // Can't merge/pair if the instruction modifies the base register.
2946 // e.g., ldr x0, [x0]
2947 // This case will never occur with an FI base.
2948 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2949 // STR<S,D,Q,W,X>pre, it can be merged.
2950 // For example:
2951 // ldr q0, [x11, #32]!
2952 // ldr q1, [x11, #16]
2953 // to
2954 // ldp q0, q1, [x11, #32]!
2955 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2956 Register BaseReg = MI.getOperand(1).getReg();
2958 if (MI.modifiesRegister(BaseReg, TRI))
2959 return false;
2960 }
2961
2962 // Pairing SVE fills/spills is only valid for little-endian targets that
2963 // implement VLS 128.
2964 switch (MI.getOpcode()) {
2965 default:
2966 break;
2967 case AArch64::LDR_ZXI:
2968 case AArch64::STR_ZXI:
2969 if (!Subtarget.isLittleEndian() ||
2970 Subtarget.getSVEVectorSizeInBits() != 128)
2971 return false;
2972 }
2973
2974 // Check if this load/store has a hint to avoid pair formation.
2975 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2977 return false;
2978
2979 // Do not pair any callee-save store/reload instructions in the
2980 // prologue/epilogue if the CFI information encoded the operations as separate
2981 // instructions, as that will cause the size of the actual prologue to mismatch
2982 // with the prologue size recorded in the Windows CFI.
2983 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2984 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2985 MI.getMF()->getFunction().needsUnwindTableEntry();
2986 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2988 return false;
2989
2990 // On some CPUs quad load/store pairs are slower than two single load/stores.
2991 if (Subtarget.isPaired128Slow()) {
2992 switch (MI.getOpcode()) {
2993 default:
2994 break;
2995 case AArch64::LDURQi:
2996 case AArch64::STURQi:
2997 case AArch64::LDRQui:
2998 case AArch64::STRQui:
2999 return false;
3000 }
3001 }
3002
3003 return true;
3004}
3005
3008 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3009 const TargetRegisterInfo *TRI) const {
3010 if (!LdSt.mayLoadOrStore())
3011 return false;
3012
3013 const MachineOperand *BaseOp;
3014 TypeSize WidthN(0, false);
3015 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3016 WidthN, TRI))
3017 return false;
3018 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3019 // vector.
3020 Width = LocationSize::precise(WidthN);
3021 BaseOps.push_back(BaseOp);
3022 return true;
3023}
3024
3025std::optional<ExtAddrMode>
3027 const TargetRegisterInfo *TRI) const {
3028 const MachineOperand *Base; // Filled with the base operand of MI.
3029 int64_t Offset; // Filled with the offset of MI.
3030 bool OffsetIsScalable;
3031 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3032 return std::nullopt;
3033
3034 if (!Base->isReg())
3035 return std::nullopt;
3036 ExtAddrMode AM;
3037 AM.BaseReg = Base->getReg();
3038 AM.Displacement = Offset;
3039 AM.ScaledReg = 0;
3040 AM.Scale = 0;
3041 return AM;
3042}
3043
3045 Register Reg,
3046 const MachineInstr &AddrI,
3047 ExtAddrMode &AM) const {
3048 // Filter out instructions into which we cannot fold.
3049 unsigned NumBytes;
3050 int64_t OffsetScale = 1;
3051 switch (MemI.getOpcode()) {
3052 default:
3053 return false;
3054
3055 case AArch64::LDURQi:
3056 case AArch64::STURQi:
3057 NumBytes = 16;
3058 break;
3059
3060 case AArch64::LDURDi:
3061 case AArch64::STURDi:
3062 case AArch64::LDURXi:
3063 case AArch64::STURXi:
3064 NumBytes = 8;
3065 break;
3066
3067 case AArch64::LDURWi:
3068 case AArch64::LDURSWi:
3069 case AArch64::STURWi:
3070 NumBytes = 4;
3071 break;
3072
3073 case AArch64::LDURHi:
3074 case AArch64::STURHi:
3075 case AArch64::LDURHHi:
3076 case AArch64::STURHHi:
3077 case AArch64::LDURSHXi:
3078 case AArch64::LDURSHWi:
3079 NumBytes = 2;
3080 break;
3081
3082 case AArch64::LDRBroX:
3083 case AArch64::LDRBBroX:
3084 case AArch64::LDRSBXroX:
3085 case AArch64::LDRSBWroX:
3086 case AArch64::STRBroX:
3087 case AArch64::STRBBroX:
3088 case AArch64::LDURBi:
3089 case AArch64::LDURBBi:
3090 case AArch64::LDURSBXi:
3091 case AArch64::LDURSBWi:
3092 case AArch64::STURBi:
3093 case AArch64::STURBBi:
3094 case AArch64::LDRBui:
3095 case AArch64::LDRBBui:
3096 case AArch64::LDRSBXui:
3097 case AArch64::LDRSBWui:
3098 case AArch64::STRBui:
3099 case AArch64::STRBBui:
3100 NumBytes = 1;
3101 break;
3102
3103 case AArch64::LDRQroX:
3104 case AArch64::STRQroX:
3105 case AArch64::LDRQui:
3106 case AArch64::STRQui:
3107 NumBytes = 16;
3108 OffsetScale = 16;
3109 break;
3110
3111 case AArch64::LDRDroX:
3112 case AArch64::STRDroX:
3113 case AArch64::LDRXroX:
3114 case AArch64::STRXroX:
3115 case AArch64::LDRDui:
3116 case AArch64::STRDui:
3117 case AArch64::LDRXui:
3118 case AArch64::STRXui:
3119 NumBytes = 8;
3120 OffsetScale = 8;
3121 break;
3122
3123 case AArch64::LDRWroX:
3124 case AArch64::LDRSWroX:
3125 case AArch64::STRWroX:
3126 case AArch64::LDRWui:
3127 case AArch64::LDRSWui:
3128 case AArch64::STRWui:
3129 NumBytes = 4;
3130 OffsetScale = 4;
3131 break;
3132
3133 case AArch64::LDRHroX:
3134 case AArch64::STRHroX:
3135 case AArch64::LDRHHroX:
3136 case AArch64::STRHHroX:
3137 case AArch64::LDRSHXroX:
3138 case AArch64::LDRSHWroX:
3139 case AArch64::LDRHui:
3140 case AArch64::STRHui:
3141 case AArch64::LDRHHui:
3142 case AArch64::STRHHui:
3143 case AArch64::LDRSHXui:
3144 case AArch64::LDRSHWui:
3145 NumBytes = 2;
3146 OffsetScale = 2;
3147 break;
3148 }
3149
3150 // Check the fold operand is not the loaded/stored value.
3151 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3152 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3153 return false;
3154
3155 // Handle memory instructions with a [Reg, Reg] addressing mode.
3156 if (MemI.getOperand(2).isReg()) {
3157 // Bail if the addressing mode already includes extension of the offset
3158 // register.
3159 if (MemI.getOperand(3).getImm())
3160 return false;
3161
3162 // Check if we actually have a scaled offset.
3163 if (MemI.getOperand(4).getImm() == 0)
3164 OffsetScale = 1;
3165
3166 // If the address instructions is folded into the base register, then the
3167 // addressing mode must not have a scale. Then we can swap the base and the
3168 // scaled registers.
3169 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3170 return false;
3171
3172 switch (AddrI.getOpcode()) {
3173 default:
3174 return false;
3175
3176 case AArch64::SBFMXri:
3177 // sxtw Xa, Wm
3178 // ldr Xd, [Xn, Xa, lsl #N]
3179 // ->
3180 // ldr Xd, [Xn, Wm, sxtw #N]
3181 if (AddrI.getOperand(2).getImm() != 0 ||
3182 AddrI.getOperand(3).getImm() != 31)
3183 return false;
3184
3185 AM.BaseReg = MemI.getOperand(1).getReg();
3186 if (AM.BaseReg == Reg)
3187 AM.BaseReg = MemI.getOperand(2).getReg();
3188 AM.ScaledReg = AddrI.getOperand(1).getReg();
3189 AM.Scale = OffsetScale;
3190 AM.Displacement = 0;
3192 return true;
3193
3194 case TargetOpcode::SUBREG_TO_REG: {
3195 // mov Wa, Wm
3196 // ldr Xd, [Xn, Xa, lsl #N]
3197 // ->
3198 // ldr Xd, [Xn, Wm, uxtw #N]
3199
3200 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3201 if (AddrI.getOperand(1).getImm() != 0 ||
3202 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3203 return false;
3204
3205 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3206 Register OffsetReg = AddrI.getOperand(2).getReg();
3207 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3208 return false;
3209
3210 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3211 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3212 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3213 DefMI.getOperand(3).getImm() != 0)
3214 return false;
3215
3216 AM.BaseReg = MemI.getOperand(1).getReg();
3217 if (AM.BaseReg == Reg)
3218 AM.BaseReg = MemI.getOperand(2).getReg();
3219 AM.ScaledReg = DefMI.getOperand(2).getReg();
3220 AM.Scale = OffsetScale;
3221 AM.Displacement = 0;
3223 return true;
3224 }
3225 }
3226 }
3227
3228 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3229
3230 // Check we are not breaking a potential conversion to an LDP.
3231 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3232 int64_t NewOffset) -> bool {
3233 int64_t MinOffset, MaxOffset;
3234 switch (NumBytes) {
3235 default:
3236 return true;
3237 case 4:
3238 MinOffset = -256;
3239 MaxOffset = 252;
3240 break;
3241 case 8:
3242 MinOffset = -512;
3243 MaxOffset = 504;
3244 break;
3245 case 16:
3246 MinOffset = -1024;
3247 MaxOffset = 1008;
3248 break;
3249 }
3250 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3251 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3252 };
3253 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3254 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3255 int64_t NewOffset = OldOffset + Disp;
3256 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3257 return false;
3258 // If the old offset would fit into an LDP, but the new offset wouldn't,
3259 // bail out.
3260 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3261 return false;
3262 AM.BaseReg = AddrI.getOperand(1).getReg();
3263 AM.ScaledReg = 0;
3264 AM.Scale = 0;
3265 AM.Displacement = NewOffset;
3267 return true;
3268 };
3269
3270 auto canFoldAddRegIntoAddrMode =
3271 [&](int64_t Scale,
3273 if (MemI.getOperand(2).getImm() != 0)
3274 return false;
3275 if ((unsigned)Scale != Scale)
3276 return false;
3277 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3278 return false;
3279 AM.BaseReg = AddrI.getOperand(1).getReg();
3280 AM.ScaledReg = AddrI.getOperand(2).getReg();
3281 AM.Scale = Scale;
3282 AM.Displacement = 0;
3283 AM.Form = Form;
3284 return true;
3285 };
3286
3287 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3288 unsigned Opcode = MemI.getOpcode();
3289 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3290 Subtarget.isSTRQroSlow();
3291 };
3292
3293 int64_t Disp = 0;
3294 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3295 switch (AddrI.getOpcode()) {
3296 default:
3297 return false;
3298
3299 case AArch64::ADDXri:
3300 // add Xa, Xn, #N
3301 // ldr Xd, [Xa, #M]
3302 // ->
3303 // ldr Xd, [Xn, #N'+M]
3304 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3305 return canFoldAddSubImmIntoAddrMode(Disp);
3306
3307 case AArch64::SUBXri:
3308 // sub Xa, Xn, #N
3309 // ldr Xd, [Xa, #M]
3310 // ->
3311 // ldr Xd, [Xn, #N'+M]
3312 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3313 return canFoldAddSubImmIntoAddrMode(-Disp);
3314
3315 case AArch64::ADDXrs: {
3316 // add Xa, Xn, Xm, lsl #N
3317 // ldr Xd, [Xa]
3318 // ->
3319 // ldr Xd, [Xn, Xm, lsl #N]
3320
3321 // Don't fold the add if the result would be slower, unless optimising for
3322 // size.
3323 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3325 return false;
3326 Shift = AArch64_AM::getShiftValue(Shift);
3327 if (!OptSize) {
3328 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3329 return false;
3330 if (avoidSlowSTRQ(MemI))
3331 return false;
3332 }
3333 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3334 }
3335
3336 case AArch64::ADDXrr:
3337 // add Xa, Xn, Xm
3338 // ldr Xd, [Xa]
3339 // ->
3340 // ldr Xd, [Xn, Xm, lsl #0]
3341
3342 // Don't fold the add if the result would be slower, unless optimising for
3343 // size.
3344 if (!OptSize && avoidSlowSTRQ(MemI))
3345 return false;
3346 return canFoldAddRegIntoAddrMode(1);
3347
3348 case AArch64::ADDXrx:
3349 // add Xa, Xn, Wm, {s,u}xtw #N
3350 // ldr Xd, [Xa]
3351 // ->
3352 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3353
3354 // Don't fold the add if the result would be slower, unless optimising for
3355 // size.
3356 if (!OptSize && avoidSlowSTRQ(MemI))
3357 return false;
3358
3359 // Can fold only sign-/zero-extend of a word.
3360 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3362 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3363 return false;
3364
3365 return canFoldAddRegIntoAddrMode(
3366 1ULL << AArch64_AM::getArithShiftValue(Imm),
3369 }
3370}
3371
3372// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3373// return the opcode of an instruction performing the same operation, but using
3374// the [Reg, Reg] addressing mode.
3375static unsigned regOffsetOpcode(unsigned Opcode) {
3376 switch (Opcode) {
3377 default:
3378 llvm_unreachable("Address folding not implemented for instruction");
3379
3380 case AArch64::LDURQi:
3381 case AArch64::LDRQui:
3382 return AArch64::LDRQroX;
3383 case AArch64::STURQi:
3384 case AArch64::STRQui:
3385 return AArch64::STRQroX;
3386 case AArch64::LDURDi:
3387 case AArch64::LDRDui:
3388 return AArch64::LDRDroX;
3389 case AArch64::STURDi:
3390 case AArch64::STRDui:
3391 return AArch64::STRDroX;
3392 case AArch64::LDURXi:
3393 case AArch64::LDRXui:
3394 return AArch64::LDRXroX;
3395 case AArch64::STURXi:
3396 case AArch64::STRXui:
3397 return AArch64::STRXroX;
3398 case AArch64::LDURWi:
3399 case AArch64::LDRWui:
3400 return AArch64::LDRWroX;
3401 case AArch64::LDURSWi:
3402 case AArch64::LDRSWui:
3403 return AArch64::LDRSWroX;
3404 case AArch64::STURWi:
3405 case AArch64::STRWui:
3406 return AArch64::STRWroX;
3407 case AArch64::LDURHi:
3408 case AArch64::LDRHui:
3409 return AArch64::LDRHroX;
3410 case AArch64::STURHi:
3411 case AArch64::STRHui:
3412 return AArch64::STRHroX;
3413 case AArch64::LDURHHi:
3414 case AArch64::LDRHHui:
3415 return AArch64::LDRHHroX;
3416 case AArch64::STURHHi:
3417 case AArch64::STRHHui:
3418 return AArch64::STRHHroX;
3419 case AArch64::LDURSHXi:
3420 case AArch64::LDRSHXui:
3421 return AArch64::LDRSHXroX;
3422 case AArch64::LDURSHWi:
3423 case AArch64::LDRSHWui:
3424 return AArch64::LDRSHWroX;
3425 case AArch64::LDURBi:
3426 case AArch64::LDRBui:
3427 return AArch64::LDRBroX;
3428 case AArch64::LDURBBi:
3429 case AArch64::LDRBBui:
3430 return AArch64::LDRBBroX;
3431 case AArch64::LDURSBXi:
3432 case AArch64::LDRSBXui:
3433 return AArch64::LDRSBXroX;
3434 case AArch64::LDURSBWi:
3435 case AArch64::LDRSBWui:
3436 return AArch64::LDRSBWroX;
3437 case AArch64::STURBi:
3438 case AArch64::STRBui:
3439 return AArch64::STRBroX;
3440 case AArch64::STURBBi:
3441 case AArch64::STRBBui:
3442 return AArch64::STRBBroX;
3443 }
3444}
3445
3446// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3447// the opcode of an instruction performing the same operation, but using the
3448// [Reg, #Imm] addressing mode with scaled offset.
3449unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3450 switch (Opcode) {
3451 default:
3452 llvm_unreachable("Address folding not implemented for instruction");
3453
3454 case AArch64::LDURQi:
3455 Scale = 16;
3456 return AArch64::LDRQui;
3457 case AArch64::STURQi:
3458 Scale = 16;
3459 return AArch64::STRQui;
3460 case AArch64::LDURDi:
3461 Scale = 8;
3462 return AArch64::LDRDui;
3463 case AArch64::STURDi:
3464 Scale = 8;
3465 return AArch64::STRDui;
3466 case AArch64::LDURXi:
3467 Scale = 8;
3468 return AArch64::LDRXui;
3469 case AArch64::STURXi:
3470 Scale = 8;
3471 return AArch64::STRXui;
3472 case AArch64::LDURWi:
3473 Scale = 4;
3474 return AArch64::LDRWui;
3475 case AArch64::LDURSWi:
3476 Scale = 4;
3477 return AArch64::LDRSWui;
3478 case AArch64::STURWi:
3479 Scale = 4;
3480 return AArch64::STRWui;
3481 case AArch64::LDURHi:
3482 Scale = 2;
3483 return AArch64::LDRHui;
3484 case AArch64::STURHi:
3485 Scale = 2;
3486 return AArch64::STRHui;
3487 case AArch64::LDURHHi:
3488 Scale = 2;
3489 return AArch64::LDRHHui;
3490 case AArch64::STURHHi:
3491 Scale = 2;
3492 return AArch64::STRHHui;
3493 case AArch64::LDURSHXi:
3494 Scale = 2;
3495 return AArch64::LDRSHXui;
3496 case AArch64::LDURSHWi:
3497 Scale = 2;
3498 return AArch64::LDRSHWui;
3499 case AArch64::LDURBi:
3500 Scale = 1;
3501 return AArch64::LDRBui;
3502 case AArch64::LDURBBi:
3503 Scale = 1;
3504 return AArch64::LDRBBui;
3505 case AArch64::LDURSBXi:
3506 Scale = 1;
3507 return AArch64::LDRSBXui;
3508 case AArch64::LDURSBWi:
3509 Scale = 1;
3510 return AArch64::LDRSBWui;
3511 case AArch64::STURBi:
3512 Scale = 1;
3513 return AArch64::STRBui;
3514 case AArch64::STURBBi:
3515 Scale = 1;
3516 return AArch64::STRBBui;
3517 case AArch64::LDRQui:
3518 case AArch64::STRQui:
3519 Scale = 16;
3520 return Opcode;
3521 case AArch64::LDRDui:
3522 case AArch64::STRDui:
3523 case AArch64::LDRXui:
3524 case AArch64::STRXui:
3525 Scale = 8;
3526 return Opcode;
3527 case AArch64::LDRWui:
3528 case AArch64::LDRSWui:
3529 case AArch64::STRWui:
3530 Scale = 4;
3531 return Opcode;
3532 case AArch64::LDRHui:
3533 case AArch64::STRHui:
3534 case AArch64::LDRHHui:
3535 case AArch64::STRHHui:
3536 case AArch64::LDRSHXui:
3537 case AArch64::LDRSHWui:
3538 Scale = 2;
3539 return Opcode;
3540 case AArch64::LDRBui:
3541 case AArch64::LDRBBui:
3542 case AArch64::LDRSBXui:
3543 case AArch64::LDRSBWui:
3544 case AArch64::STRBui:
3545 case AArch64::STRBBui:
3546 Scale = 1;
3547 return Opcode;
3548 }
3549}
3550
3551// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3552// the opcode of an instruction performing the same operation, but using the
3553// [Reg, #Imm] addressing mode with unscaled offset.
3554unsigned unscaledOffsetOpcode(unsigned Opcode) {
3555 switch (Opcode) {
3556 default:
3557 llvm_unreachable("Address folding not implemented for instruction");
3558
3559 case AArch64::LDURQi:
3560 case AArch64::STURQi:
3561 case AArch64::LDURDi:
3562 case AArch64::STURDi:
3563 case AArch64::LDURXi:
3564 case AArch64::STURXi:
3565 case AArch64::LDURWi:
3566 case AArch64::LDURSWi:
3567 case AArch64::STURWi:
3568 case AArch64::LDURHi:
3569 case AArch64::STURHi:
3570 case AArch64::LDURHHi:
3571 case AArch64::STURHHi:
3572 case AArch64::LDURSHXi:
3573 case AArch64::LDURSHWi:
3574 case AArch64::LDURBi:
3575 case AArch64::STURBi:
3576 case AArch64::LDURBBi:
3577 case AArch64::STURBBi:
3578 case AArch64::LDURSBWi:
3579 case AArch64::LDURSBXi:
3580 return Opcode;
3581 case AArch64::LDRQui:
3582 return AArch64::LDURQi;
3583 case AArch64::STRQui:
3584 return AArch64::STURQi;
3585 case AArch64::LDRDui:
3586 return AArch64::LDURDi;
3587 case AArch64::STRDui:
3588 return AArch64::STURDi;
3589 case AArch64::LDRXui:
3590 return AArch64::LDURXi;
3591 case AArch64::STRXui:
3592 return AArch64::STURXi;
3593 case AArch64::LDRWui:
3594 return AArch64::LDURWi;
3595 case AArch64::LDRSWui:
3596 return AArch64::LDURSWi;
3597 case AArch64::STRWui:
3598 return AArch64::STURWi;
3599 case AArch64::LDRHui:
3600 return AArch64::LDURHi;
3601 case AArch64::STRHui:
3602 return AArch64::STURHi;
3603 case AArch64::LDRHHui:
3604 return AArch64::LDURHHi;
3605 case AArch64::STRHHui:
3606 return AArch64::STURHHi;
3607 case AArch64::LDRSHXui:
3608 return AArch64::LDURSHXi;
3609 case AArch64::LDRSHWui:
3610 return AArch64::LDURSHWi;
3611 case AArch64::LDRBBui:
3612 return AArch64::LDURBBi;
3613 case AArch64::LDRBui:
3614 return AArch64::LDURBi;
3615 case AArch64::STRBBui:
3616 return AArch64::STURBBi;
3617 case AArch64::STRBui:
3618 return AArch64::STURBi;
3619 case AArch64::LDRSBWui:
3620 return AArch64::LDURSBWi;
3621 case AArch64::LDRSBXui:
3622 return AArch64::LDURSBXi;
3623 }
3624}
3625
3626// Given the opcode of a memory load/store instruction, return the opcode of an
3627// instruction performing the same operation, but using
3628// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3629// offset register.
3630static unsigned offsetExtendOpcode(unsigned Opcode) {
3631 switch (Opcode) {
3632 default:
3633 llvm_unreachable("Address folding not implemented for instruction");
3634
3635 case AArch64::LDRQroX:
3636 case AArch64::LDURQi:
3637 case AArch64::LDRQui:
3638 return AArch64::LDRQroW;
3639 case AArch64::STRQroX:
3640 case AArch64::STURQi:
3641 case AArch64::STRQui:
3642 return AArch64::STRQroW;
3643 case AArch64::LDRDroX:
3644 case AArch64::LDURDi:
3645 case AArch64::LDRDui:
3646 return AArch64::LDRDroW;
3647 case AArch64::STRDroX:
3648 case AArch64::STURDi:
3649 case AArch64::STRDui:
3650 return AArch64::STRDroW;
3651 case AArch64::LDRXroX:
3652 case AArch64::LDURXi:
3653 case AArch64::LDRXui:
3654 return AArch64::LDRXroW;
3655 case AArch64::STRXroX:
3656 case AArch64::STURXi:
3657 case AArch64::STRXui:
3658 return AArch64::STRXroW;
3659 case AArch64::LDRWroX:
3660 case AArch64::LDURWi:
3661 case AArch64::LDRWui:
3662 return AArch64::LDRWroW;
3663 case AArch64::LDRSWroX:
3664 case AArch64::LDURSWi:
3665 case AArch64::LDRSWui:
3666 return AArch64::LDRSWroW;
3667 case AArch64::STRWroX:
3668 case AArch64::STURWi:
3669 case AArch64::STRWui:
3670 return AArch64::STRWroW;
3671 case AArch64::LDRHroX:
3672 case AArch64::LDURHi:
3673 case AArch64::LDRHui:
3674 return AArch64::LDRHroW;
3675 case AArch64::STRHroX:
3676 case AArch64::STURHi:
3677 case AArch64::STRHui:
3678 return AArch64::STRHroW;
3679 case AArch64::LDRHHroX:
3680 case AArch64::LDURHHi:
3681 case AArch64::LDRHHui:
3682 return AArch64::LDRHHroW;
3683 case AArch64::STRHHroX:
3684 case AArch64::STURHHi:
3685 case AArch64::STRHHui:
3686 return AArch64::STRHHroW;
3687 case AArch64::LDRSHXroX:
3688 case AArch64::LDURSHXi:
3689 case AArch64::LDRSHXui:
3690 return AArch64::LDRSHXroW;
3691 case AArch64::LDRSHWroX:
3692 case AArch64::LDURSHWi:
3693 case AArch64::LDRSHWui:
3694 return AArch64::LDRSHWroW;
3695 case AArch64::LDRBroX:
3696 case AArch64::LDURBi:
3697 case AArch64::LDRBui:
3698 return AArch64::LDRBroW;
3699 case AArch64::LDRBBroX:
3700 case AArch64::LDURBBi:
3701 case AArch64::LDRBBui:
3702 return AArch64::LDRBBroW;
3703 case AArch64::LDRSBXroX:
3704 case AArch64::LDURSBXi:
3705 case AArch64::LDRSBXui:
3706 return AArch64::LDRSBXroW;
3707 case AArch64::LDRSBWroX:
3708 case AArch64::LDURSBWi:
3709 case AArch64::LDRSBWui:
3710 return AArch64::LDRSBWroW;
3711 case AArch64::STRBroX:
3712 case AArch64::STURBi:
3713 case AArch64::STRBui:
3714 return AArch64::STRBroW;
3715 case AArch64::STRBBroX:
3716 case AArch64::STURBBi:
3717 case AArch64::STRBBui:
3718 return AArch64::STRBBroW;
3719 }
3720}
3721
3723 const ExtAddrMode &AM) const {
3724
3725 const DebugLoc &DL = MemI.getDebugLoc();
3726 MachineBasicBlock &MBB = *MemI.getParent();
3728
3730 if (AM.ScaledReg) {
3731 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3732 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3733 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3734 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3735 .addReg(MemI.getOperand(0).getReg(),
3736 MemI.mayLoad() ? RegState::Define : 0)
3737 .addReg(AM.BaseReg)
3738 .addReg(AM.ScaledReg)
3739 .addImm(0)
3740 .addImm(AM.Scale > 1)
3741 .setMemRefs(MemI.memoperands())
3742 .setMIFlags(MemI.getFlags());
3743 return B.getInstr();
3744 }
3745
3746 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3747 "Addressing mode not supported for folding");
3748
3749 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3750 unsigned Scale = 1;
3751 unsigned Opcode = MemI.getOpcode();
3752 if (isInt<9>(AM.Displacement))
3753 Opcode = unscaledOffsetOpcode(Opcode);
3754 else
3755 Opcode = scaledOffsetOpcode(Opcode, Scale);
3756
3757 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3758 .addReg(MemI.getOperand(0).getReg(),
3759 MemI.mayLoad() ? RegState::Define : 0)
3760 .addReg(AM.BaseReg)
3761 .addImm(AM.Displacement / Scale)
3762 .setMemRefs(MemI.memoperands())
3763 .setMIFlags(MemI.getFlags());
3764 return B.getInstr();
3765 }
3766
3769 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3770 assert(AM.ScaledReg && !AM.Displacement &&
3771 "Address offset can be a register or an immediate, but not both");
3772 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3773 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3774 // Make sure the offset register is in the correct register class.
3775 Register OffsetReg = AM.ScaledReg;
3776 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3777 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3778 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3779 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3780 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3781 }
3782 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3783 .addReg(MemI.getOperand(0).getReg(),
3784 MemI.mayLoad() ? RegState::Define : 0)
3785 .addReg(AM.BaseReg)
3786 .addReg(OffsetReg)
3788 .addImm(AM.Scale != 1)
3789 .setMemRefs(MemI.memoperands())
3790 .setMIFlags(MemI.getFlags());
3791
3792 return B.getInstr();
3793 }
3794
3796 "Function must not be called with an addressing mode it can't handle");
3797}
3798
3799/// Return true if the opcode is a post-index ld/st instruction, which really
3800/// loads from base+0.
3801static bool isPostIndexLdStOpcode(unsigned Opcode) {
3802 switch (Opcode) {
3803 default:
3804 return false;
3805 case AArch64::LD1Fourv16b_POST:
3806 case AArch64::LD1Fourv1d_POST:
3807 case AArch64::LD1Fourv2d_POST:
3808 case AArch64::LD1Fourv2s_POST:
3809 case AArch64::LD1Fourv4h_POST:
3810 case AArch64::LD1Fourv4s_POST:
3811 case AArch64::LD1Fourv8b_POST:
3812 case AArch64::LD1Fourv8h_POST:
3813 case AArch64::LD1Onev16b_POST:
3814 case AArch64::LD1Onev1d_POST:
3815 case AArch64::LD1Onev2d_POST:
3816 case AArch64::LD1Onev2s_POST:
3817 case AArch64::LD1Onev4h_POST:
3818 case AArch64::LD1Onev4s_POST:
3819 case AArch64::LD1Onev8b_POST:
3820 case AArch64::LD1Onev8h_POST:
3821 case AArch64::LD1Rv16b_POST:
3822 case AArch64::LD1Rv1d_POST:
3823 case AArch64::LD1Rv2d_POST:
3824 case AArch64::LD1Rv2s_POST:
3825 case AArch64::LD1Rv4h_POST:
3826 case AArch64::LD1Rv4s_POST:
3827 case AArch64::LD1Rv8b_POST:
3828 case AArch64::LD1Rv8h_POST:
3829 case AArch64::LD1Threev16b_POST:
3830 case AArch64::LD1Threev1d_POST:
3831 case AArch64::LD1Threev2d_POST:
3832 case AArch64::LD1Threev2s_POST:
3833 case AArch64::LD1Threev4h_POST:
3834 case AArch64::LD1Threev4s_POST:
3835 case AArch64::LD1Threev8b_POST:
3836 case AArch64::LD1Threev8h_POST:
3837 case AArch64::LD1Twov16b_POST:
3838 case AArch64::LD1Twov1d_POST:
3839 case AArch64::LD1Twov2d_POST:
3840 case AArch64::LD1Twov2s_POST:
3841 case AArch64::LD1Twov4h_POST:
3842 case AArch64::LD1Twov4s_POST:
3843 case AArch64::LD1Twov8b_POST:
3844 case AArch64::LD1Twov8h_POST:
3845 case AArch64::LD1i16_POST:
3846 case AArch64::LD1i32_POST:
3847 case AArch64::LD1i64_POST:
3848 case AArch64::LD1i8_POST:
3849 case AArch64::LD2Rv16b_POST:
3850 case AArch64::LD2Rv1d_POST:
3851 case AArch64::LD2Rv2d_POST:
3852 case AArch64::LD2Rv2s_POST:
3853 case AArch64::LD2Rv4h_POST:
3854 case AArch64::LD2Rv4s_POST:
3855 case AArch64::LD2Rv8b_POST:
3856 case AArch64::LD2Rv8h_POST:
3857 case AArch64::LD2Twov16b_POST:
3858 case AArch64::LD2Twov2d_POST:
3859 case AArch64::LD2Twov2s_POST:
3860 case AArch64::LD2Twov4h_POST:
3861 case AArch64::LD2Twov4s_POST:
3862 case AArch64::LD2Twov8b_POST:
3863 case AArch64::LD2Twov8h_POST:
3864 case AArch64::LD2i16_POST:
3865 case AArch64::LD2i32_POST:
3866 case AArch64::LD2i64_POST:
3867 case AArch64::LD2i8_POST:
3868 case AArch64::LD3Rv16b_POST:
3869 case AArch64::LD3Rv1d_POST:
3870 case AArch64::LD3Rv2d_POST:
3871 case AArch64::LD3Rv2s_POST:
3872 case AArch64::LD3Rv4h_POST:
3873 case AArch64::LD3Rv4s_POST:
3874 case AArch64::LD3Rv8b_POST:
3875 case AArch64::LD3Rv8h_POST:
3876 case AArch64::LD3Threev16b_POST:
3877 case AArch64::LD3Threev2d_POST:
3878 case AArch64::LD3Threev2s_POST:
3879 case AArch64::LD3Threev4h_POST:
3880 case AArch64::LD3Threev4s_POST:
3881 case AArch64::LD3Threev8b_POST:
3882 case AArch64::LD3Threev8h_POST:
3883 case AArch64::LD3i16_POST:
3884 case AArch64::LD3i32_POST:
3885 case AArch64::LD3i64_POST:
3886 case AArch64::LD3i8_POST:
3887 case AArch64::LD4Fourv16b_POST:
3888 case AArch64::LD4Fourv2d_POST:
3889 case AArch64::LD4Fourv2s_POST:
3890 case AArch64::LD4Fourv4h_POST:
3891 case AArch64::LD4Fourv4s_POST:
3892 case AArch64::LD4Fourv8b_POST:
3893 case AArch64::LD4Fourv8h_POST:
3894 case AArch64::LD4Rv16b_POST:
3895 case AArch64::LD4Rv1d_POST:
3896 case AArch64::LD4Rv2d_POST:
3897 case AArch64::LD4Rv2s_POST:
3898 case AArch64::LD4Rv4h_POST:
3899 case AArch64::LD4Rv4s_POST:
3900 case AArch64::LD4Rv8b_POST:
3901 case AArch64::LD4Rv8h_POST:
3902 case AArch64::LD4i16_POST:
3903 case AArch64::LD4i32_POST:
3904 case AArch64::LD4i64_POST:
3905 case AArch64::LD4i8_POST:
3906 case AArch64::LDAPRWpost:
3907 case AArch64::LDAPRXpost:
3908 case AArch64::LDIAPPWpost:
3909 case AArch64::LDIAPPXpost:
3910 case AArch64::LDPDpost:
3911 case AArch64::LDPQpost:
3912 case AArch64::LDPSWpost:
3913 case AArch64::LDPSpost:
3914 case AArch64::LDPWpost:
3915 case AArch64::LDPXpost:
3916 case AArch64::LDRBBpost:
3917 case AArch64::LDRBpost:
3918 case AArch64::LDRDpost:
3919 case AArch64::LDRHHpost:
3920 case AArch64::LDRHpost:
3921 case AArch64::LDRQpost:
3922 case AArch64::LDRSBWpost:
3923 case AArch64::LDRSBXpost:
3924 case AArch64::LDRSHWpost:
3925 case AArch64::LDRSHXpost:
3926 case AArch64::LDRSWpost:
3927 case AArch64::LDRSpost:
3928 case AArch64::LDRWpost:
3929 case AArch64::LDRXpost:
3930 case AArch64::ST1Fourv16b_POST:
3931 case AArch64::ST1Fourv1d_POST:
3932 case AArch64::ST1Fourv2d_POST:
3933 case AArch64::ST1Fourv2s_POST:
3934 case AArch64::ST1Fourv4h_POST:
3935 case AArch64::ST1Fourv4s_POST:
3936 case AArch64::ST1Fourv8b_POST:
3937 case AArch64::ST1Fourv8h_POST:
3938 case AArch64::ST1Onev16b_POST:
3939 case AArch64::ST1Onev1d_POST:
3940 case AArch64::ST1Onev2d_POST:
3941 case AArch64::ST1Onev2s_POST:
3942 case AArch64::ST1Onev4h_POST:
3943 case AArch64::ST1Onev4s_POST:
3944 case AArch64::ST1Onev8b_POST:
3945 case AArch64::ST1Onev8h_POST:
3946 case AArch64::ST1Threev16b_POST:
3947 case AArch64::ST1Threev1d_POST:
3948 case AArch64::ST1Threev2d_POST:
3949 case AArch64::ST1Threev2s_POST:
3950 case AArch64::ST1Threev4h_POST:
3951 case AArch64::ST1Threev4s_POST:
3952 case AArch64::ST1Threev8b_POST:
3953 case AArch64::ST1Threev8h_POST:
3954 case AArch64::ST1Twov16b_POST:
3955 case AArch64::ST1Twov1d_POST:
3956 case AArch64::ST1Twov2d_POST:
3957 case AArch64::ST1Twov2s_POST:
3958 case AArch64::ST1Twov4h_POST:
3959 case AArch64::ST1Twov4s_POST:
3960 case AArch64::ST1Twov8b_POST:
3961 case AArch64::ST1Twov8h_POST:
3962 case AArch64::ST1i16_POST:
3963 case AArch64::ST1i32_POST:
3964 case AArch64::ST1i64_POST:
3965 case AArch64::ST1i8_POST:
3966 case AArch64::ST2GPostIndex:
3967 case AArch64::ST2Twov16b_POST:
3968 case AArch64::ST2Twov2d_POST:
3969 case AArch64::ST2Twov2s_POST:
3970 case AArch64::ST2Twov4h_POST:
3971 case AArch64::ST2Twov4s_POST:
3972 case AArch64::ST2Twov8b_POST:
3973 case AArch64::ST2Twov8h_POST:
3974 case AArch64::ST2i16_POST:
3975 case AArch64::ST2i32_POST:
3976 case AArch64::ST2i64_POST:
3977 case AArch64::ST2i8_POST:
3978 case AArch64::ST3Threev16b_POST:
3979 case AArch64::ST3Threev2d_POST:
3980 case AArch64::ST3Threev2s_POST:
3981 case AArch64::ST3Threev4h_POST:
3982 case AArch64::ST3Threev4s_POST:
3983 case AArch64::ST3Threev8b_POST:
3984 case AArch64::ST3Threev8h_POST:
3985 case AArch64::ST3i16_POST:
3986 case AArch64::ST3i32_POST:
3987 case AArch64::ST3i64_POST:
3988 case AArch64::ST3i8_POST:
3989 case AArch64::ST4Fourv16b_POST:
3990 case AArch64::ST4Fourv2d_POST:
3991 case AArch64::ST4Fourv2s_POST:
3992 case AArch64::ST4Fourv4h_POST:
3993 case AArch64::ST4Fourv4s_POST:
3994 case AArch64::ST4Fourv8b_POST:
3995 case AArch64::ST4Fourv8h_POST:
3996 case AArch64::ST4i16_POST:
3997 case AArch64::ST4i32_POST:
3998 case AArch64::ST4i64_POST:
3999 case AArch64::ST4i8_POST:
4000 case AArch64::STGPostIndex:
4001 case AArch64::STGPpost:
4002 case AArch64::STPDpost:
4003 case AArch64::STPQpost:
4004 case AArch64::STPSpost:
4005 case AArch64::STPWpost:
4006 case AArch64::STPXpost:
4007 case AArch64::STRBBpost:
4008 case AArch64::STRBpost:
4009 case AArch64::STRDpost:
4010 case AArch64::STRHHpost:
4011 case AArch64::STRHpost:
4012 case AArch64::STRQpost:
4013 case AArch64::STRSpost:
4014 case AArch64::STRWpost:
4015 case AArch64::STRXpost:
4016 case AArch64::STZ2GPostIndex:
4017 case AArch64::STZGPostIndex:
4018 return true;
4019 }
4020}
4021
4023 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4024 bool &OffsetIsScalable, TypeSize &Width,
4025 const TargetRegisterInfo *TRI) const {
4026 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4027 // Handle only loads/stores with base register followed by immediate offset.
4028 if (LdSt.getNumExplicitOperands() == 3) {
4029 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4030 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4031 !LdSt.getOperand(2).isImm())
4032 return false;
4033 } else if (LdSt.getNumExplicitOperands() == 4) {
4034 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4035 if (!LdSt.getOperand(1).isReg() ||
4036 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4037 !LdSt.getOperand(3).isImm())
4038 return false;
4039 } else
4040 return false;
4041
4042 // Get the scaling factor for the instruction and set the width for the
4043 // instruction.
4044 TypeSize Scale(0U, false);
4045 int64_t Dummy1, Dummy2;
4046
4047 // If this returns false, then it's an instruction we don't want to handle.
4048 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4049 return false;
4050
4051 // Compute the offset. Offset is calculated as the immediate operand
4052 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4053 // set to 1. Postindex are a special case which have an offset of 0.
4054 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4055 BaseOp = &LdSt.getOperand(2);
4056 Offset = 0;
4057 } else if (LdSt.getNumExplicitOperands() == 3) {
4058 BaseOp = &LdSt.getOperand(1);
4059 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4060 } else {
4061 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4062 BaseOp = &LdSt.getOperand(2);
4063 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4064 }
4065 OffsetIsScalable = Scale.isScalable();
4066
4067 return BaseOp->isReg() || BaseOp->isFI();
4068}
4069
4072 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4073 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4074 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4075 return OfsOp;
4076}
4077
4078bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4079 TypeSize &Width, int64_t &MinOffset,
4080 int64_t &MaxOffset) {
4081 switch (Opcode) {
4082 // Not a memory operation or something we want to handle.
4083 default:
4084 Scale = TypeSize::getFixed(0);
4085 Width = TypeSize::getFixed(0);
4086 MinOffset = MaxOffset = 0;
4087 return false;
4088 // LDR / STR
4089 case AArch64::LDRQui:
4090 case AArch64::STRQui:
4091 Scale = TypeSize::getFixed(16);
4092 Width = TypeSize::getFixed(16);
4093 MinOffset = 0;
4094 MaxOffset = 4095;
4095 break;
4096 case AArch64::LDRXui:
4097 case AArch64::LDRDui:
4098 case AArch64::STRXui:
4099 case AArch64::STRDui:
4100 case AArch64::PRFMui:
4101 Scale = TypeSize::getFixed(8);
4102 Width = TypeSize::getFixed(8);
4103 MinOffset = 0;
4104 MaxOffset = 4095;
4105 break;
4106 case AArch64::LDRWui:
4107 case AArch64::LDRSui:
4108 case AArch64::LDRSWui:
4109 case AArch64::STRWui:
4110 case AArch64::STRSui:
4111 Scale = TypeSize::getFixed(4);
4112 Width = TypeSize::getFixed(4);
4113 MinOffset = 0;
4114 MaxOffset = 4095;
4115 break;
4116 case AArch64::LDRHui:
4117 case AArch64::LDRHHui:
4118 case AArch64::LDRSHWui:
4119 case AArch64::LDRSHXui:
4120 case AArch64::STRHui:
4121 case AArch64::STRHHui:
4122 Scale = TypeSize::getFixed(2);
4123 Width = TypeSize::getFixed(2);
4124 MinOffset = 0;
4125 MaxOffset = 4095;
4126 break;
4127 case AArch64::LDRBui:
4128 case AArch64::LDRBBui:
4129 case AArch64::LDRSBWui:
4130 case AArch64::LDRSBXui:
4131 case AArch64::STRBui:
4132 case AArch64::STRBBui:
4133 Scale = TypeSize::getFixed(1);
4134 Width = TypeSize::getFixed(1);
4135 MinOffset = 0;
4136 MaxOffset = 4095;
4137 break;
4138 // post/pre inc
4139 case AArch64::STRQpre:
4140 case AArch64::LDRQpost:
4141 Scale = TypeSize::getFixed(1);
4142 Width = TypeSize::getFixed(16);
4143 MinOffset = -256;
4144 MaxOffset = 255;
4145 break;
4146 case AArch64::LDRDpost:
4147 case AArch64::LDRDpre:
4148 case AArch64::LDRXpost:
4149 case AArch64::LDRXpre:
4150 case AArch64::STRDpost:
4151 case AArch64::STRDpre:
4152 case AArch64::STRXpost:
4153 case AArch64::STRXpre:
4154 Scale = TypeSize::getFixed(1);
4155 Width = TypeSize::getFixed(8);
4156 MinOffset = -256;
4157 MaxOffset = 255;
4158 break;
4159 case AArch64::STRWpost:
4160 case AArch64::STRWpre:
4161 case AArch64::LDRWpost:
4162 case AArch64::LDRWpre:
4163 case AArch64::STRSpost:
4164 case AArch64::STRSpre:
4165 case AArch64::LDRSpost:
4166 case AArch64::LDRSpre:
4167 Scale = TypeSize::getFixed(1);
4168 Width = TypeSize::getFixed(4);
4169 MinOffset = -256;
4170 MaxOffset = 255;
4171 break;
4172 case AArch64::LDRHpost:
4173 case AArch64::LDRHpre:
4174 case AArch64::STRHpost:
4175 case AArch64::STRHpre:
4176 case AArch64::LDRHHpost:
4177 case AArch64::LDRHHpre:
4178 case AArch64::STRHHpost:
4179 case AArch64::STRHHpre:
4180 Scale = TypeSize::getFixed(1);
4181 Width = TypeSize::getFixed(2);
4182 MinOffset = -256;
4183 MaxOffset = 255;
4184 break;
4185 case AArch64::LDRBpost:
4186 case AArch64::LDRBpre:
4187 case AArch64::STRBpost:
4188 case AArch64::STRBpre:
4189 case AArch64::LDRBBpost:
4190 case AArch64::LDRBBpre:
4191 case AArch64::STRBBpost:
4192 case AArch64::STRBBpre:
4193 Scale = TypeSize::getFixed(1);
4194 Width = TypeSize::getFixed(1);
4195 MinOffset = -256;
4196 MaxOffset = 255;
4197 break;
4198 // Unscaled
4199 case AArch64::LDURQi:
4200 case AArch64::STURQi:
4201 Scale = TypeSize::getFixed(1);
4202 Width = TypeSize::getFixed(16);
4203 MinOffset = -256;
4204 MaxOffset = 255;
4205 break;
4206 case AArch64::LDURXi:
4207 case AArch64::LDURDi:
4208 case AArch64::LDAPURXi:
4209 case AArch64::STURXi:
4210 case AArch64::STURDi:
4211 case AArch64::STLURXi:
4212 case AArch64::PRFUMi:
4213 Scale = TypeSize::getFixed(1);
4214 Width = TypeSize::getFixed(8);
4215 MinOffset = -256;
4216 MaxOffset = 255;
4217 break;
4218 case AArch64::LDURWi:
4219 case AArch64::LDURSi:
4220 case AArch64::LDURSWi:
4221 case AArch64::LDAPURi:
4222 case AArch64::LDAPURSWi:
4223 case AArch64::STURWi:
4224 case AArch64::STURSi:
4225 case AArch64::STLURWi:
4226 Scale = TypeSize::getFixed(1);
4227 Width = TypeSize::getFixed(4);
4228 MinOffset = -256;
4229 MaxOffset = 255;
4230 break;
4231 case AArch64::LDURHi:
4232 case AArch64::LDURHHi:
4233 case AArch64::LDURSHXi:
4234 case AArch64::LDURSHWi:
4235 case AArch64::LDAPURHi:
4236 case AArch64::LDAPURSHWi:
4237 case AArch64::LDAPURSHXi:
4238 case AArch64::STURHi:
4239 case AArch64::STURHHi:
4240 case AArch64::STLURHi:
4241 Scale = TypeSize::getFixed(1);
4242 Width = TypeSize::getFixed(2);
4243 MinOffset = -256;
4244 MaxOffset = 255;
4245 break;
4246 case AArch64::LDURBi:
4247 case AArch64::LDURBBi:
4248 case AArch64::LDURSBXi:
4249 case AArch64::LDURSBWi:
4250 case AArch64::LDAPURBi:
4251 case AArch64::LDAPURSBWi:
4252 case AArch64::LDAPURSBXi:
4253 case AArch64::STURBi:
4254 case AArch64::STURBBi:
4255 case AArch64::STLURBi:
4256 Scale = TypeSize::getFixed(1);
4257 Width = TypeSize::getFixed(1);
4258 MinOffset = -256;
4259 MaxOffset = 255;
4260 break;
4261 // LDP / STP (including pre/post inc)
4262 case AArch64::LDPQi:
4263 case AArch64::LDNPQi:
4264 case AArch64::STPQi:
4265 case AArch64::STNPQi:
4266 case AArch64::LDPQpost:
4267 case AArch64::LDPQpre:
4268 case AArch64::STPQpost:
4269 case AArch64::STPQpre:
4270 Scale = TypeSize::getFixed(16);
4271 Width = TypeSize::getFixed(16 * 2);
4272 MinOffset = -64;
4273 MaxOffset = 63;
4274 break;
4275 case AArch64::LDPXi:
4276 case AArch64::LDPDi:
4277 case AArch64::LDNPXi:
4278 case AArch64::LDNPDi:
4279 case AArch64::STPXi:
4280 case AArch64::STPDi:
4281 case AArch64::STNPXi:
4282 case AArch64::STNPDi:
4283 case AArch64::LDPDpost:
4284 case AArch64::LDPDpre:
4285 case AArch64::LDPXpost:
4286 case AArch64::LDPXpre:
4287 case AArch64::STPDpost:
4288 case AArch64::STPDpre:
4289 case AArch64::STPXpost:
4290 case AArch64::STPXpre:
4291 Scale = TypeSize::getFixed(8);
4292 Width = TypeSize::getFixed(8 * 2);
4293 MinOffset = -64;
4294 MaxOffset = 63;
4295 break;
4296 case AArch64::LDPWi:
4297 case AArch64::LDPSi:
4298 case AArch64::LDNPWi:
4299 case AArch64::LDNPSi:
4300 case AArch64::STPWi:
4301 case AArch64::STPSi:
4302 case AArch64::STNPWi:
4303 case AArch64::STNPSi:
4304 case AArch64::LDPSpost:
4305 case AArch64::LDPSpre:
4306 case AArch64::LDPWpost:
4307 case AArch64::LDPWpre:
4308 case AArch64::STPSpost:
4309 case AArch64::STPSpre:
4310 case AArch64::STPWpost:
4311 case AArch64::STPWpre:
4312 Scale = TypeSize::getFixed(4);
4313 Width = TypeSize::getFixed(4 * 2);
4314 MinOffset = -64;
4315 MaxOffset = 63;
4316 break;
4317 case AArch64::StoreSwiftAsyncContext:
4318 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4319 Scale = TypeSize::getFixed(1);
4320 Width = TypeSize::getFixed(8);
4321 MinOffset = 0;
4322 MaxOffset = 4095;
4323 break;
4324 case AArch64::ADDG:
4325 Scale = TypeSize::getFixed(16);
4326 Width = TypeSize::getFixed(0);
4327 MinOffset = 0;
4328 MaxOffset = 63;
4329 break;
4330 case AArch64::TAGPstack:
4331 Scale = TypeSize::getFixed(16);
4332 Width = TypeSize::getFixed(0);
4333 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4334 // of 63 (not 64!).
4335 MinOffset = -63;
4336 MaxOffset = 63;
4337 break;
4338 case AArch64::LDG:
4339 case AArch64::STGi:
4340 case AArch64::STGPreIndex:
4341 case AArch64::STGPostIndex:
4342 case AArch64::STZGi:
4343 case AArch64::STZGPreIndex:
4344 case AArch64::STZGPostIndex:
4345 Scale = TypeSize::getFixed(16);
4346 Width = TypeSize::getFixed(16);
4347 MinOffset = -256;
4348 MaxOffset = 255;
4349 break;
4350 // SVE
4351 case AArch64::STR_ZZZZXI:
4352 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4353 case AArch64::LDR_ZZZZXI:
4354 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4355 Scale = TypeSize::getScalable(16);
4356 Width = TypeSize::getScalable(16 * 4);
4357 MinOffset = -256;
4358 MaxOffset = 252;
4359 break;
4360 case AArch64::STR_ZZZXI:
4361 case AArch64::LDR_ZZZXI:
4362 Scale = TypeSize::getScalable(16);
4363 Width = TypeSize::getScalable(16 * 3);
4364 MinOffset = -256;
4365 MaxOffset = 253;
4366 break;
4367 case AArch64::STR_ZZXI:
4368 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4369 case AArch64::LDR_ZZXI:
4370 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4371 Scale = TypeSize::getScalable(16);
4372 Width = TypeSize::getScalable(16 * 2);
4373 MinOffset = -256;
4374 MaxOffset = 254;
4375 break;
4376 case AArch64::LDR_PXI:
4377 case AArch64::STR_PXI:
4378 Scale = TypeSize::getScalable(2);
4379 Width = TypeSize::getScalable(2);
4380 MinOffset = -256;
4381 MaxOffset = 255;
4382 break;
4383 case AArch64::LDR_PPXI:
4384 case AArch64::STR_PPXI:
4385 Scale = TypeSize::getScalable(2);
4386 Width = TypeSize::getScalable(2 * 2);
4387 MinOffset = -256;
4388 MaxOffset = 254;
4389 break;
4390 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
4391 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
4392 case AArch64::LDR_ZXI:
4393 case AArch64::STR_ZXI:
4394 Scale = TypeSize::getScalable(16);
4395 Width = TypeSize::getScalable(16);
4396 MinOffset = -256;
4397 MaxOffset = 255;
4398 break;
4399 case AArch64::LD1B_IMM:
4400 case AArch64::LD1H_IMM:
4401 case AArch64::LD1W_IMM:
4402 case AArch64::LD1D_IMM:
4403 case AArch64::LDNT1B_ZRI:
4404 case AArch64::LDNT1H_ZRI:
4405 case AArch64::LDNT1W_ZRI:
4406 case AArch64::LDNT1D_ZRI:
4407 case AArch64::ST1B_IMM:
4408 case AArch64::ST1H_IMM:
4409 case AArch64::ST1W_IMM:
4410 case AArch64::ST1D_IMM:
4411 case AArch64::STNT1B_ZRI:
4412 case AArch64::STNT1H_ZRI:
4413 case AArch64::STNT1W_ZRI:
4414 case AArch64::STNT1D_ZRI:
4415 case AArch64::LDNF1B_IMM:
4416 case AArch64::LDNF1H_IMM:
4417 case AArch64::LDNF1W_IMM:
4418 case AArch64::LDNF1D_IMM:
4419 // A full vectors worth of data
4420 // Width = mbytes * elements
4421 Scale = TypeSize::getScalable(16);
4422 Width = TypeSize::getScalable(16);
4423 MinOffset = -8;
4424 MaxOffset = 7;
4425 break;
4426 case AArch64::LD2B_IMM:
4427 case AArch64::LD2H_IMM:
4428 case AArch64::LD2W_IMM:
4429 case AArch64::LD2D_IMM:
4430 case AArch64::ST2B_IMM:
4431 case AArch64::ST2H_IMM:
4432 case AArch64::ST2W_IMM:
4433 case AArch64::ST2D_IMM:
4434 Scale = TypeSize::getScalable(32);
4435 Width = TypeSize::getScalable(16 * 2);
4436 MinOffset = -8;
4437 MaxOffset = 7;
4438 break;
4439 case AArch64::LD3B_IMM:
4440 case AArch64::LD3H_IMM:
4441 case AArch64::LD3W_IMM:
4442 case AArch64::LD3D_IMM:
4443 case AArch64::ST3B_IMM:
4444 case AArch64::ST3H_IMM:
4445 case AArch64::ST3W_IMM:
4446 case AArch64::ST3D_IMM:
4447 Scale = TypeSize::getScalable(48);
4448 Width = TypeSize::getScalable(16 * 3);
4449 MinOffset = -8;
4450 MaxOffset = 7;
4451 break;
4452 case AArch64::LD4B_IMM:
4453 case AArch64::LD4H_IMM:
4454 case AArch64::LD4W_IMM:
4455 case AArch64::LD4D_IMM:
4456 case AArch64::ST4B_IMM:
4457 case AArch64::ST4H_IMM:
4458 case AArch64::ST4W_IMM:
4459 case AArch64::ST4D_IMM:
4460 Scale = TypeSize::getScalable(64);
4461 Width = TypeSize::getScalable(16 * 4);
4462 MinOffset = -8;
4463 MaxOffset = 7;
4464 break;
4465 case AArch64::LD1B_H_IMM:
4466 case AArch64::LD1SB_H_IMM:
4467 case AArch64::LD1H_S_IMM:
4468 case AArch64::LD1SH_S_IMM:
4469 case AArch64::LD1W_D_IMM:
4470 case AArch64::LD1SW_D_IMM:
4471 case AArch64::ST1B_H_IMM:
4472 case AArch64::ST1H_S_IMM:
4473 case AArch64::ST1W_D_IMM:
4474 case AArch64::LDNF1B_H_IMM:
4475 case AArch64::LDNF1SB_H_IMM:
4476 case AArch64::LDNF1H_S_IMM:
4477 case AArch64::LDNF1SH_S_IMM:
4478 case AArch64::LDNF1W_D_IMM:
4479 case AArch64::LDNF1SW_D_IMM:
4480 // A half vector worth of data
4481 // Width = mbytes * elements
4482 Scale = TypeSize::getScalable(8);
4483 Width = TypeSize::getScalable(8);
4484 MinOffset = -8;
4485 MaxOffset = 7;
4486 break;
4487 case AArch64::LD1B_S_IMM:
4488 case AArch64::LD1SB_S_IMM:
4489 case AArch64::LD1H_D_IMM:
4490 case AArch64::LD1SH_D_IMM:
4491 case AArch64::ST1B_S_IMM:
4492 case AArch64::ST1H_D_IMM:
4493 case AArch64::LDNF1B_S_IMM:
4494 case AArch64::LDNF1SB_S_IMM:
4495 case AArch64::LDNF1H_D_IMM:
4496 case AArch64::LDNF1SH_D_IMM:
4497 // A quarter vector worth of data
4498 // Width = mbytes * elements
4499 Scale = TypeSize::getScalable(4);
4500 Width = TypeSize::getScalable(4);
4501 MinOffset = -8;
4502 MaxOffset = 7;
4503 break;
4504 case AArch64::LD1B_D_IMM:
4505 case AArch64::LD1SB_D_IMM:
4506 case AArch64::ST1B_D_IMM:
4507 case AArch64::LDNF1B_D_IMM:
4508 case AArch64::LDNF1SB_D_IMM:
4509 // A eighth vector worth of data
4510 // Width = mbytes * elements
4511 Scale = TypeSize::getScalable(2);
4512 Width = TypeSize::getScalable(2);
4513 MinOffset = -8;
4514 MaxOffset = 7;
4515 break;
4516 case AArch64::ST2Gi:
4517 case AArch64::ST2GPreIndex:
4518 case AArch64::ST2GPostIndex:
4519 case AArch64::STZ2Gi:
4520 case AArch64::STZ2GPreIndex:
4521 case AArch64::STZ2GPostIndex:
4522 Scale = TypeSize::getFixed(16);
4523 Width = TypeSize::getFixed(32);
4524 MinOffset = -256;
4525 MaxOffset = 255;
4526 break;
4527 case AArch64::STGPi:
4528 case AArch64::STGPpost:
4529 case AArch64::STGPpre:
4530 Scale = TypeSize::getFixed(16);
4531 Width = TypeSize::getFixed(16);
4532 MinOffset = -64;
4533 MaxOffset = 63;
4534 break;
4535 case AArch64::LD1RB_IMM:
4536 case AArch64::LD1RB_H_IMM:
4537 case AArch64::LD1RB_S_IMM:
4538 case AArch64::LD1RB_D_IMM:
4539 case AArch64::LD1RSB_H_IMM:
4540 case AArch64::LD1RSB_S_IMM:
4541 case AArch64::LD1RSB_D_IMM:
4542 Scale = TypeSize::getFixed(1);
4543 Width = TypeSize::getFixed(1);
4544 MinOffset = 0;
4545 MaxOffset = 63;
4546 break;
4547 case AArch64::LD1RH_IMM:
4548 case AArch64::LD1RH_S_IMM:
4549 case AArch64::LD1RH_D_IMM:
4550 case AArch64::LD1RSH_S_IMM:
4551 case AArch64::LD1RSH_D_IMM:
4552 Scale = TypeSize::getFixed(2);
4553 Width = TypeSize::getFixed(2);
4554 MinOffset = 0;
4555 MaxOffset = 63;
4556 break;
4557 case AArch64::LD1RW_IMM:
4558 case AArch64::LD1RW_D_IMM:
4559 case AArch64::LD1RSW_IMM:
4560 Scale = TypeSize::getFixed(4);
4561 Width = TypeSize::getFixed(4);
4562 MinOffset = 0;
4563 MaxOffset = 63;
4564 break;
4565 case AArch64::LD1RD_IMM:
4566 Scale = TypeSize::getFixed(8);
4567 Width = TypeSize::getFixed(8);
4568 MinOffset = 0;
4569 MaxOffset = 63;
4570 break;
4571 }
4572
4573 return true;
4574}
4575
4576// Scaling factor for unscaled load or store.
4578 switch (Opc) {
4579 default:
4580 llvm_unreachable("Opcode has unknown scale!");
4581 case AArch64::LDRBBui:
4582 case AArch64::LDURBBi:
4583 case AArch64::LDRSBWui:
4584 case AArch64::LDURSBWi:
4585 case AArch64::STRBBui:
4586 case AArch64::STURBBi:
4587 return 1;
4588 case AArch64::LDRHHui:
4589 case AArch64::LDURHHi:
4590 case AArch64::LDRSHWui:
4591 case AArch64::LDURSHWi:
4592 case AArch64::STRHHui:
4593 case AArch64::STURHHi:
4594 return 2;
4595 case AArch64::LDRSui:
4596 case AArch64::LDURSi:
4597 case AArch64::LDRSpre:
4598 case AArch64::LDRSWui:
4599 case AArch64::LDURSWi:
4600 case AArch64::LDRSWpre:
4601 case AArch64::LDRWpre:
4602 case AArch64::LDRWui:
4603 case AArch64::LDURWi:
4604 case AArch64::STRSui:
4605 case AArch64::STURSi:
4606 case AArch64::STRSpre:
4607 case AArch64::STRWui:
4608 case AArch64::STURWi:
4609 case AArch64::STRWpre:
4610 case AArch64::LDPSi:
4611 case AArch64::LDPSWi:
4612 case AArch64::LDPWi:
4613 case AArch64::STPSi:
4614 case AArch64::STPWi:
4615 return 4;
4616 case AArch64::LDRDui:
4617 case AArch64::LDURDi:
4618 case AArch64::LDRDpre:
4619 case AArch64::LDRXui:
4620 case AArch64::LDURXi:
4621 case AArch64::LDRXpre:
4622 case AArch64::STRDui:
4623 case AArch64::STURDi:
4624 case AArch64::STRDpre:
4625 case AArch64::STRXui:
4626 case AArch64::STURXi:
4627 case AArch64::STRXpre:
4628 case AArch64::LDPDi:
4629 case AArch64::LDPXi:
4630 case AArch64::STPDi:
4631 case AArch64::STPXi:
4632 return 8;
4633 case AArch64::LDRQui:
4634 case AArch64::LDURQi:
4635 case AArch64::STRQui:
4636 case AArch64::STURQi:
4637 case AArch64::STRQpre:
4638 case AArch64::LDPQi:
4639 case AArch64::LDRQpre:
4640 case AArch64::STPQi:
4641 case AArch64::STGi:
4642 case AArch64::STZGi:
4643 case AArch64::ST2Gi:
4644 case AArch64::STZ2Gi:
4645 case AArch64::STGPi:
4646 return 16;
4647 }
4648}
4649
4651 switch (MI.getOpcode()) {
4652 default:
4653 return false;
4654 case AArch64::LDRWpre:
4655 case AArch64::LDRXpre:
4656 case AArch64::LDRSWpre:
4657 case AArch64::LDRSpre:
4658 case AArch64::LDRDpre:
4659 case AArch64::LDRQpre:
4660 return true;
4661 }
4662}
4663
4665 switch (MI.getOpcode()) {
4666 default:
4667 return false;
4668 case AArch64::STRWpre:
4669 case AArch64::STRXpre:
4670 case AArch64::STRSpre:
4671 case AArch64::STRDpre:
4672 case AArch64::STRQpre:
4673 return true;
4674 }
4675}
4676
4678 return isPreLd(MI) || isPreSt(MI);
4679}
4680
4682 switch (MI.getOpcode()) {
4683 default:
4684 return false;
4685 case AArch64::LDPSi:
4686 case AArch64::LDPSWi:
4687 case AArch64::LDPDi:
4688 case AArch64::LDPQi:
4689 case AArch64::LDPWi:
4690 case AArch64::LDPXi:
4691 case AArch64::STPSi:
4692 case AArch64::STPDi:
4693 case AArch64::STPQi:
4694 case AArch64::STPWi:
4695 case AArch64::STPXi:
4696 case AArch64::STGPi:
4697 return true;
4698 }
4699}
4700
4702 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4703 unsigned Idx =
4705 : 1;
4706 return MI.getOperand(Idx);
4707}
4708
4709const MachineOperand &
4711 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4712 unsigned Idx =
4714 : 2;
4715 return MI.getOperand(Idx);
4716}
4717
4718const MachineOperand &
4720 switch (MI.getOpcode()) {
4721 default:
4722 llvm_unreachable("Unexpected opcode");
4723 case AArch64::LDRBroX:
4724 case AArch64::LDRBBroX:
4725 case AArch64::LDRSBXroX:
4726 case AArch64::LDRSBWroX:
4727 case AArch64::LDRHroX:
4728 case AArch64::LDRHHroX:
4729 case AArch64::LDRSHXroX:
4730 case AArch64::LDRSHWroX:
4731 case AArch64::LDRWroX:
4732 case AArch64::LDRSroX:
4733 case AArch64::LDRSWroX:
4734 case AArch64::LDRDroX:
4735 case AArch64::LDRXroX:
4736 case AArch64::LDRQroX:
4737 return MI.getOperand(4);
4738 }
4739}
4740
4742 Register Reg) {
4743 if (MI.getParent() == nullptr)
4744 return nullptr;
4745 const MachineFunction *MF = MI.getParent()->getParent();
4746 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4747}
4748
4750 auto IsHFPR = [&](const MachineOperand &Op) {
4751 if (!Op.isReg())
4752 return false;
4753 auto Reg = Op.getReg();
4754 if (Reg.isPhysical())
4755 return AArch64::FPR16RegClass.contains(Reg);
4756 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4757 return TRC == &AArch64::FPR16RegClass ||
4758 TRC == &AArch64::FPR16_loRegClass;
4759 };
4760 return llvm::any_of(MI.operands(), IsHFPR);
4761}
4762
4764 auto IsQFPR = [&](const MachineOperand &Op) {
4765 if (!Op.isReg())
4766 return false;
4767 auto Reg = Op.getReg();
4768 if (Reg.isPhysical())
4769 return AArch64::FPR128RegClass.contains(Reg);
4770 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4771 return TRC == &AArch64::FPR128RegClass ||
4772 TRC == &AArch64::FPR128_loRegClass;
4773 };
4774 return llvm::any_of(MI.operands(), IsQFPR);
4775}
4776
4778 switch (MI.getOpcode()) {
4779 case AArch64::BRK:
4780 case AArch64::HLT:
4781 case AArch64::PACIASP:
4782 case AArch64::PACIBSP:
4783 // Implicit BTI behavior.
4784 return true;
4785 case AArch64::PAUTH_PROLOGUE:
4786 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4787 return true;
4788 case AArch64::HINT: {
4789 unsigned Imm = MI.getOperand(0).getImm();
4790 // Explicit BTI instruction.
4791 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4792 return true;
4793 // PACI(A|B)SP instructions.
4794 if (Imm == 25 || Imm == 27)
4795 return true;
4796 return false;
4797 }
4798 default:
4799 return false;
4800 }
4801}
4802
4804 if (Reg == 0)
4805 return false;
4806 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4807 return AArch64::FPR128RegClass.contains(Reg) ||
4808 AArch64::FPR64RegClass.contains(Reg) ||
4809 AArch64::FPR32RegClass.contains(Reg) ||
4810 AArch64::FPR16RegClass.contains(Reg) ||
4811 AArch64::FPR8RegClass.contains(Reg);
4812}
4813
4815 auto IsFPR = [&](const MachineOperand &Op) {
4816 if (!Op.isReg())
4817 return false;
4818 auto Reg = Op.getReg();
4819 if (Reg.isPhysical())
4820 return isFpOrNEON(Reg);
4821
4822 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4823 return TRC == &AArch64::FPR128RegClass ||
4824 TRC == &AArch64::FPR128_loRegClass ||
4825 TRC == &AArch64::FPR64RegClass ||
4826 TRC == &AArch64::FPR64_loRegClass ||
4827 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4828 TRC == &AArch64::FPR8RegClass;
4829 };
4830 return llvm::any_of(MI.operands(), IsFPR);
4831}
4832
4833// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4834// scaled.
4835static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4837
4838 // If the byte-offset isn't a multiple of the stride, we can't scale this
4839 // offset.
4840 if (Offset % Scale != 0)
4841 return false;
4842
4843 // Convert the byte-offset used by unscaled into an "element" offset used
4844 // by the scaled pair load/store instructions.
4845 Offset /= Scale;
4846 return true;
4847}
4848
4849static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4850 if (FirstOpc == SecondOpc)
4851 return true;
4852 // We can also pair sign-ext and zero-ext instructions.
4853 switch (FirstOpc) {
4854 default:
4855 return false;
4856 case AArch64::STRSui:
4857 case AArch64::STURSi:
4858 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4859 case AArch64::STRDui:
4860 case AArch64::STURDi:
4861 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4862 case AArch64::STRQui:
4863 case AArch64::STURQi:
4864 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4865 case AArch64::STRWui:
4866 case AArch64::STURWi:
4867 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4868 case AArch64::STRXui:
4869 case AArch64::STURXi:
4870 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4871 case AArch64::LDRSui:
4872 case AArch64::LDURSi:
4873 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4874 case AArch64::LDRDui:
4875 case AArch64::LDURDi:
4876 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4877 case AArch64::LDRQui:
4878 case AArch64::LDURQi:
4879 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4880 case AArch64::LDRWui:
4881 case AArch64::LDURWi:
4882 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4883 case AArch64::LDRSWui:
4884 case AArch64::LDURSWi:
4885 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4886 case AArch64::LDRXui:
4887 case AArch64::LDURXi:
4888 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4889 }
4890 // These instructions can't be paired based on their opcodes.
4891 return false;
4892}
4893
4894static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4895 int64_t Offset1, unsigned Opcode1, int FI2,
4896 int64_t Offset2, unsigned Opcode2) {
4897 // Accesses through fixed stack object frame indices may access a different
4898 // fixed stack slot. Check that the object offsets + offsets match.
4899 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4900 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4901 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4902 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4903 // Convert to scaled object offsets.
4904 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4905 if (ObjectOffset1 % Scale1 != 0)
4906 return false;
4907 ObjectOffset1 /= Scale1;
4908 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4909 if (ObjectOffset2 % Scale2 != 0)
4910 return false;
4911 ObjectOffset2 /= Scale2;
4912 ObjectOffset1 += Offset1;
4913 ObjectOffset2 += Offset2;
4914 return ObjectOffset1 + 1 == ObjectOffset2;
4915 }
4916
4917 return FI1 == FI2;
4918}
4919
4920/// Detect opportunities for ldp/stp formation.
4921///
4922/// Only called for LdSt for which getMemOperandWithOffset returns true.
4924 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4925 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4926 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4927 unsigned NumBytes) const {
4928 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4929 const MachineOperand &BaseOp1 = *BaseOps1.front();
4930 const MachineOperand &BaseOp2 = *BaseOps2.front();
4931 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4932 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4933 if (BaseOp1.getType() != BaseOp2.getType())
4934 return false;
4935
4936 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4937 "Only base registers and frame indices are supported.");
4938
4939 // Check for both base regs and base FI.
4940 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4941 return false;
4942
4943 // Only cluster up to a single pair.
4944 if (ClusterSize > 2)
4945 return false;
4946
4947 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4948 return false;
4949
4950 // Can we pair these instructions based on their opcodes?
4951 unsigned FirstOpc = FirstLdSt.getOpcode();
4952 unsigned SecondOpc = SecondLdSt.getOpcode();
4953 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4954 return false;
4955
4956 // Can't merge volatiles or load/stores that have a hint to avoid pair
4957 // formation, for example.
4958 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4959 !isCandidateToMergeOrPair(SecondLdSt))
4960 return false;
4961
4962 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4963 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4964 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4965 return false;
4966
4967 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4968 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4969 return false;
4970
4971 // Pairwise instructions have a 7-bit signed offset field.
4972 if (Offset1 > 63 || Offset1 < -64)
4973 return false;
4974
4975 // The caller should already have ordered First/SecondLdSt by offset.
4976 // Note: except for non-equal frame index bases
4977 if (BaseOp1.isFI()) {
4978 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4979 "Caller should have ordered offsets.");
4980
4981 const MachineFrameInfo &MFI =
4982 FirstLdSt.getParent()->getParent()->getFrameInfo();
4983 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4984 BaseOp2.getIndex(), Offset2, SecondOpc);
4985 }
4986
4987 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4988
4989 return Offset1 + 1 == Offset2;
4990}
4991
4993 MCRegister Reg, unsigned SubIdx,
4994 unsigned State,
4995 const TargetRegisterInfo *TRI) {
4996 if (!SubIdx)
4997 return MIB.addReg(Reg, State);
4998
4999 if (Reg.isPhysical())
5000 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
5001 return MIB.addReg(Reg, State, SubIdx);
5002}
5003
5004static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5005 unsigned NumRegs) {
5006 // We really want the positive remainder mod 32 here, that happens to be
5007 // easily obtainable with a mask.
5008 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5009}
5010
5013 const DebugLoc &DL, MCRegister DestReg,
5014 MCRegister SrcReg, bool KillSrc,
5015 unsigned Opcode,
5016 ArrayRef<unsigned> Indices) const {
5017 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5019 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5020 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5021 unsigned NumRegs = Indices.size();
5022
5023 int SubReg = 0, End = NumRegs, Incr = 1;
5024 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5025 SubReg = NumRegs - 1;
5026 End = -1;
5027 Incr = -1;
5028 }
5029
5030 for (; SubReg != End; SubReg += Incr) {
5031 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5032 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5033 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5034 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5035 }
5036}
5037
5040 const DebugLoc &DL, MCRegister DestReg,
5041 MCRegister SrcReg, bool KillSrc,
5042 unsigned Opcode, unsigned ZeroReg,
5043 llvm::ArrayRef<unsigned> Indices) const {
5045 unsigned NumRegs = Indices.size();
5046
5047#ifndef NDEBUG
5048 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5049 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5050 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5051 "GPR reg sequences should not be able to overlap");
5052#endif
5053
5054 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5055 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5056 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5057 MIB.addReg(ZeroReg);
5058 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5059 MIB.addImm(0);
5060 }
5061}
5062
5065 const DebugLoc &DL, Register DestReg,
5066 Register SrcReg, bool KillSrc,
5067 bool RenamableDest,
5068 bool RenamableSrc) const {
5069 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5070 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
5072
5073 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5074 // If either operand is WSP, expand to ADD #0.
5075 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5076 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5077 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5078 MCRegister DestRegX = TRI->getMatchingSuperReg(
5079 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5080 MCRegister SrcRegX = TRI->getMatchingSuperReg(
5081 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5082 // This instruction is reading and writing X registers. This may upset
5083 // the register scavenger and machine verifier, so we need to indicate
5084 // that we are reading an undefined value from SrcRegX, but a proper
5085 // value from SrcReg.
5086 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5087 .addReg(SrcRegX, RegState::Undef)
5088 .addImm(0)
5090 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5091 } else {
5092 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5093 .addReg(SrcReg, getKillRegState(KillSrc))
5094 .addImm(0)
5096 }
5097 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) {
5098 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5099 .addImm(0)
5101 } else {
5102 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5103 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5104 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5105 MCRegister DestRegX = TRI->getMatchingSuperReg(
5106 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5107 assert(DestRegX.isValid() && "Destination super-reg not valid");
5108 MCRegister SrcRegX =
5109 SrcReg == AArch64::WZR
5110 ? AArch64::XZR
5111 : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
5112 &AArch64::GPR64spRegClass);
5113 assert(SrcRegX.isValid() && "Source super-reg not valid");
5114 // This instruction is reading and writing X registers. This may upset
5115 // the register scavenger and machine verifier, so we need to indicate
5116 // that we are reading an undefined value from SrcRegX, but a proper
5117 // value from SrcReg.
5118 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5119 .addReg(AArch64::XZR)
5120 .addReg(SrcRegX, RegState::Undef)
5121 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5122 } else {
5123 // Otherwise, expand to ORR WZR.
5124 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5125 .addReg(AArch64::WZR)
5126 .addReg(SrcReg, getKillRegState(KillSrc));
5127 }
5128 }
5129 return;
5130 }
5131
5132 // Copy a Predicate register by ORRing with itself.
5133 if (AArch64::PPRRegClass.contains(DestReg) &&
5134 AArch64::PPRRegClass.contains(SrcReg)) {
5135 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5136 "Unexpected SVE register.");
5137 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5138 .addReg(SrcReg) // Pg
5139 .addReg(SrcReg)
5140 .addReg(SrcReg, getKillRegState(KillSrc));
5141 return;
5142 }
5143
5144 // Copy a predicate-as-counter register by ORRing with itself as if it
5145 // were a regular predicate (mask) register.
5146 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5147 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5148 if (DestIsPNR || SrcIsPNR) {
5149 auto ToPPR = [](MCRegister R) -> MCRegister {
5150 return (R - AArch64::PN0) + AArch64::P0;
5151 };
5152 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5153 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5154
5155 if (PPRSrcReg != PPRDestReg) {
5156 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5157 .addReg(PPRSrcReg) // Pg
5158 .addReg(PPRSrcReg)
5159 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5160 if (DestIsPNR)
5161 NewMI.addDef(DestReg, RegState::Implicit);
5162 }
5163 return;
5164 }
5165
5166 // Copy a Z register by ORRing with itself.
5167 if (AArch64::ZPRRegClass.contains(DestReg) &&
5168 AArch64::ZPRRegClass.contains(SrcReg)) {
5169 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5170 "Unexpected SVE register.");
5171 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5172 .addReg(SrcReg)
5173 .addReg(SrcReg, getKillRegState(KillSrc));
5174 return;
5175 }
5176
5177 // Copy a Z register pair by copying the individual sub-registers.
5178 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5179 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5180 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5181 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5182 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5183 "Unexpected SVE register.");
5184 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5185 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5186 Indices);
5187 return;
5188 }
5189
5190 // Copy a Z register triple by copying the individual sub-registers.
5191 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5192 AArch64::ZPR3RegClass.contains(SrcReg)) {
5193 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5194 "Unexpected SVE register.");
5195 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5196 AArch64::zsub2};
5197 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5198 Indices);
5199 return;
5200 }
5201
5202 // Copy a Z register quad by copying the individual sub-registers.
5203 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5204 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5205 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5206 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5207 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5208 "Unexpected SVE register.");
5209 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5210 AArch64::zsub2, AArch64::zsub3};
5211 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5212 Indices);
5213 return;
5214 }
5215
5216 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5217 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
5218 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5219 // If either operand is SP, expand to ADD #0.
5220 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5221 .addReg(SrcReg, getKillRegState(KillSrc))
5222 .addImm(0)
5224 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) {
5225 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5226 .addImm(0)
5228 } else {
5229 // Otherwise, expand to ORR XZR.
5230 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5231 .addReg(AArch64::XZR)
5232 .addReg(SrcReg, getKillRegState(KillSrc));
5233 }
5234 return;
5235 }
5236
5237 // Copy a DDDD register quad by copying the individual sub-registers.
5238 if (AArch64::DDDDRegClass.contains(DestReg) &&
5239 AArch64::DDDDRegClass.contains(SrcReg)) {
5240 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5241 AArch64::dsub2, AArch64::dsub3};
5242 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5243 Indices);
5244 return;
5245 }
5246
5247 // Copy a DDD register triple by copying the individual sub-registers.
5248 if (AArch64::DDDRegClass.contains(DestReg) &&
5249 AArch64::DDDRegClass.contains(SrcReg)) {
5250 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5251 AArch64::dsub2};
5252 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5253 Indices);
5254 return;
5255 }
5256
5257 // Copy a DD register pair by copying the individual sub-registers.
5258 if (AArch64::DDRegClass.contains(DestReg) &&
5259 AArch64::DDRegClass.contains(SrcReg)) {
5260 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5261 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5262 Indices);
5263 return;
5264 }
5265
5266 // Copy a QQQQ register quad by copying the individual sub-registers.
5267 if (AArch64::QQQQRegClass.contains(DestReg) &&
5268 AArch64::QQQQRegClass.contains(SrcReg)) {
5269 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5270 AArch64::qsub2, AArch64::qsub3};
5271 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5272 Indices);
5273 return;
5274 }
5275
5276 // Copy a QQQ register triple by copying the individual sub-registers.
5277 if (AArch64::QQQRegClass.contains(DestReg) &&
5278 AArch64::QQQRegClass.contains(SrcReg)) {
5279 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5280 AArch64::qsub2};
5281 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5282 Indices);
5283 return;
5284 }
5285
5286 // Copy a QQ register pair by copying the individual sub-registers.
5287 if (AArch64::QQRegClass.contains(DestReg) &&
5288 AArch64::QQRegClass.contains(SrcReg)) {
5289 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5290 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5291 Indices);
5292 return;
5293 }
5294
5295 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5296 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5297 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5298 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5299 AArch64::XZR, Indices);
5300 return;
5301 }
5302
5303 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5304 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5305 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5306 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5307 AArch64::WZR, Indices);
5308 return;
5309 }
5310
5311 if (AArch64::FPR128RegClass.contains(DestReg) &&
5312 AArch64::FPR128RegClass.contains(SrcReg)) {
5313 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5314 !Subtarget.isNeonAvailable())
5315 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5316 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5317 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5318 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5319 else if (Subtarget.isNeonAvailable())
5320 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5321 .addReg(SrcReg)
5322 .addReg(SrcReg, getKillRegState(KillSrc));
5323 else {
5324 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5325 .addReg(AArch64::SP, RegState::Define)
5326 .addReg(SrcReg, getKillRegState(KillSrc))
5327 .addReg(AArch64::SP)
5328 .addImm(-16);
5329 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5330 .addReg(AArch64::SP, RegState::Define)
5331 .addReg(DestReg, RegState::Define)
5332 .addReg(AArch64::SP)
5333 .addImm(16);
5334 }
5335 return;
5336 }
5337
5338 if (AArch64::FPR64RegClass.contains(DestReg) &&
5339 AArch64::FPR64RegClass.contains(SrcReg)) {
5340 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5341 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5342 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5344 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub,
5345 &AArch64::FPR128RegClass);
5346 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub,
5347 &AArch64::FPR128RegClass);
5348 // This instruction is reading and writing Q registers. This may upset
5349 // the register scavenger and machine verifier, so we need to indicate
5350 // that we are reading an undefined value from SrcRegQ, but a proper
5351 // value from SrcReg.
5352 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5353 .addReg(SrcRegQ, RegState::Undef)
5354 .addReg(SrcRegQ, RegState::Undef)
5355 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5356 } else {
5357 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5358 .addReg(SrcReg, getKillRegState(KillSrc));
5359 }
5360 return;
5361 }
5362
5363 if (AArch64::FPR32RegClass.contains(DestReg) &&
5364 AArch64::FPR32RegClass.contains(SrcReg)) {
5365 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5366 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5367 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5369 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5370 &AArch64::FPR128RegClass);
5371 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5372 &AArch64::FPR128RegClass);
5373 // This instruction is reading and writing Q registers. This may upset
5374 // the register scavenger and machine verifier, so we need to indicate
5375 // that we are reading an undefined value from SrcRegQ, but a proper
5376 // value from SrcReg.
5377 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5378 .addReg(SrcRegQ, RegState::Undef)
5379 .addReg(SrcRegQ, RegState::Undef)
5380 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5381 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5382 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5384 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5385 &AArch64::FPR64RegClass);
5386 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5387 &AArch64::FPR64RegClass);
5388 // This instruction is reading and writing D registers. This may upset
5389 // the register scavenger and machine verifier, so we need to indicate
5390 // that we are reading an undefined value from SrcRegD, but a proper
5391 // value from SrcReg.
5392 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5393 .addReg(SrcRegD, RegState::Undef)
5394 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5395 } else {
5396 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5397 .addReg(SrcReg, getKillRegState(KillSrc));
5398 }
5399 return;
5400 }
5401
5402 if (AArch64::FPR16RegClass.contains(DestReg) &&
5403 AArch64::FPR16RegClass.contains(SrcReg)) {
5404 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5405 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5406 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5408 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5409 &AArch64::FPR128RegClass);
5410 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5411 &AArch64::FPR128RegClass);
5412 // This instruction is reading and writing Q registers. This may upset
5413 // the register scavenger and machine verifier, so we need to indicate
5414 // that we are reading an undefined value from SrcRegQ, but a proper
5415 // value from SrcReg.
5416 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5417 .addReg(SrcRegQ, RegState::Undef)
5418 .addReg(SrcRegQ, RegState::Undef)
5419 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5420 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5421 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5423 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5424 &AArch64::FPR64RegClass);
5425 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5426 &AArch64::FPR64RegClass);
5427 // This instruction is reading and writing D registers. This may upset
5428 // the register scavenger and machine verifier, so we need to indicate
5429 // that we are reading an undefined value from SrcRegD, but a proper
5430 // value from SrcReg.
5431 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5432 .addReg(SrcRegD, RegState::Undef)
5433 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5434 } else {
5435 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5436 &AArch64::FPR32RegClass);
5437 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5438 &AArch64::FPR32RegClass);
5439 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5440 .addReg(SrcReg, getKillRegState(KillSrc));
5441 }
5442 return;
5443 }
5444
5445 if (AArch64::FPR8RegClass.contains(DestReg) &&
5446 AArch64::FPR8RegClass.contains(SrcReg)) {
5447 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5448 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5449 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5451 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5452 &AArch64::FPR128RegClass);
5453 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5454 &AArch64::FPR128RegClass);
5455 // This instruction is reading and writing Q registers. This may upset
5456 // the register scavenger and machine verifier, so we need to indicate
5457 // that we are reading an undefined value from SrcRegQ, but a proper
5458 // value from SrcReg.
5459 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5460 .addReg(SrcRegQ, RegState::Undef)
5461 .addReg(SrcRegQ, RegState::Undef)
5462 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5463 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5464 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5466 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5467 &AArch64::FPR64RegClass);
5468 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5469 &AArch64::FPR64RegClass);
5470 // This instruction is reading and writing D registers. This may upset
5471 // the register scavenger and machine verifier, so we need to indicate
5472 // that we are reading an undefined value from SrcRegD, but a proper
5473 // value from SrcReg.
5474 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5475 .addReg(SrcRegD, RegState::Undef)
5476 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5477 } else {
5478 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5479 &AArch64::FPR32RegClass);
5480 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5481 &AArch64::FPR32RegClass);
5482 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5483 .addReg(SrcReg, getKillRegState(KillSrc));
5484 }
5485 return;
5486 }
5487
5488 // Copies between GPR64 and FPR64.
5489 if (AArch64::FPR64RegClass.contains(DestReg) &&
5490 AArch64::GPR64RegClass.contains(SrcReg)) {
5491 if (AArch64::XZR == SrcReg) {
5492 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5493 } else {
5494 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5495 .addReg(SrcReg, getKillRegState(KillSrc));
5496 }
5497 return;
5498 }
5499 if (AArch64::GPR64RegClass.contains(DestReg) &&
5500 AArch64::FPR64RegClass.contains(SrcReg)) {
5501 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5502 .addReg(SrcReg, getKillRegState(KillSrc));
5503 return;
5504 }
5505 // Copies between GPR32 and FPR32.
5506 if (AArch64::FPR32RegClass.contains(DestReg) &&
5507 AArch64::GPR32RegClass.contains(SrcReg)) {
5508 if (AArch64::WZR == SrcReg) {
5509 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5510 } else {
5511 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5512 .addReg(SrcReg, getKillRegState(KillSrc));
5513 }
5514 return;
5515 }
5516 if (AArch64::GPR32RegClass.contains(DestReg) &&
5517 AArch64::FPR32RegClass.contains(SrcReg)) {
5518 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5519 .addReg(SrcReg, getKillRegState(KillSrc));
5520 return;
5521 }
5522
5523 if (DestReg == AArch64::NZCV) {
5524 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5525 BuildMI(MBB, I, DL, get(AArch64::MSR))
5526 .addImm(AArch64SysReg::NZCV)
5527 .addReg(SrcReg, getKillRegState(KillSrc))
5528 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5529 return;
5530 }
5531
5532 if (SrcReg == AArch64::NZCV) {
5533 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5534 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5535 .addImm(AArch64SysReg::NZCV)
5536 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5537 return;
5538 }
5539
5540#ifndef NDEBUG
5542 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5543 << TRI.getRegAsmName(SrcReg) << "\n";
5544#endif
5545 llvm_unreachable("unimplemented reg-to-reg copy");
5546}
5547
5550 MachineBasicBlock::iterator InsertBefore,
5551 const MCInstrDesc &MCID,
5552 Register SrcReg, bool IsKill,
5553 unsigned SubIdx0, unsigned SubIdx1, int FI,
5554 MachineMemOperand *MMO) {
5555 Register SrcReg0 = SrcReg;
5556 Register SrcReg1 = SrcReg;
5557 if (SrcReg.isPhysical()) {
5558 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5559 SubIdx0 = 0;
5560 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5561 SubIdx1 = 0;
5562 }
5563 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5564 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5565 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5566 .addFrameIndex(FI)
5567 .addImm(0)
5568 .addMemOperand(MMO);
5569}
5570
5573 Register SrcReg, bool isKill, int FI,
5574 const TargetRegisterClass *RC,
5575 const TargetRegisterInfo *TRI,
5576 Register VReg,
5577 MachineInstr::MIFlag Flags) const {
5578 MachineFunction &MF = *MBB.getParent();
5579 MachineFrameInfo &MFI = MF.getFrameInfo();
5580
5582 MachineMemOperand *MMO =
5584 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5585 unsigned Opc = 0;
5586 bool Offset = true;
5588 unsigned StackID = TargetStackID::Default;
5589 switch (TRI->getSpillSize(*RC)) {
5590 case 1:
5591 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5592 Opc = AArch64::STRBui;
5593 break;
5594 case 2: {
5595 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5596 Opc = AArch64::STRHui;
5597 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5598 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5599 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5600 "Unexpected register store without SVE store instructions");
5601 Opc = AArch64::STR_PXI;
5603 }
5604 break;
5605 }
5606 case 4:
5607 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5608 Opc = AArch64::STRWui;
5609 if (SrcReg.isVirtual())
5610 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5611 else
5612 assert(SrcReg != AArch64::WSP);
5613 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5614 Opc = AArch64::STRSui;
5615 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5616 Opc = AArch64::STR_PPXI;
5618 }
5619 break;
5620 case 8:
5621 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5622 Opc = AArch64::STRXui;
5623 if (SrcReg.isVirtual())
5624 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5625 else
5626 assert(SrcReg != AArch64::SP);
5627 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5628 Opc = AArch64::STRDui;
5629 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5631 get(AArch64::STPWi), SrcReg, isKill,
5632 AArch64::sube32, AArch64::subo32, FI, MMO);
5633 return;
5634 }
5635 break;
5636 case 16:
5637 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5638 Opc = AArch64::STRQui;
5639 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5640 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5641 Opc = AArch64::ST1Twov1d;
5642 Offset = false;
5643 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5645 get(AArch64::STPXi), SrcReg, isKill,
5646 AArch64::sube64, AArch64::subo64, FI, MMO);
5647 return;
5648 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5649 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5650 "Unexpected register store without SVE store instructions");
5651 Opc = AArch64::STR_ZXI;
5653 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5654 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5655 "Unexpected predicate store without SVE store instructions");
5656 Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO;
5658 }
5659 break;
5660 case 24:
5661 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5662 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5663 Opc = AArch64::ST1Threev1d;
5664 Offset = false;
5665 }
5666 break;
5667 case 32:
5668 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5669 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5670 Opc = AArch64::ST1Fourv1d;
5671 Offset = false;
5672 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5673 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5674 Opc = AArch64::ST1Twov2d;
5675 Offset = false;
5676 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5677 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5678 "Unexpected register store without SVE store instructions");
5679 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
5681 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5682 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5683 "Unexpected register store without SVE store instructions");
5684 Opc = AArch64::STR_ZZXI;
5686 }
5687 break;
5688 case 48:
5689 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5690 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5691 Opc = AArch64::ST1Threev2d;
5692 Offset = false;
5693 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5694 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5695 "Unexpected register store without SVE store instructions");
5696 Opc = AArch64::STR_ZZZXI;
5698 }
5699 break;
5700 case 64:
5701 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5702 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5703 Opc = AArch64::ST1Fourv2d;
5704 Offset = false;
5705 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5706 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5707 "Unexpected register store without SVE store instructions");
5708 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
5710 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5711 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5712 "Unexpected register store without SVE store instructions");
5713 Opc = AArch64::STR_ZZZZXI;
5715 }
5716 break;
5717 }
5718 assert(Opc && "Unknown register class");
5719 MFI.setStackID(FI, StackID);
5720
5722 .addReg(SrcReg, getKillRegState(isKill))
5723 .addFrameIndex(FI);
5724
5725 if (Offset)
5726 MI.addImm(0);
5727 if (PNRReg.isValid())
5728 MI.addDef(PNRReg, RegState::Implicit);
5729 MI.addMemOperand(MMO);
5730}
5731
5734 MachineBasicBlock::iterator InsertBefore,
5735 const MCInstrDesc &MCID,
5736 Register DestReg, unsigned SubIdx0,
5737 unsigned SubIdx1, int FI,
5738 MachineMemOperand *MMO) {
5739 Register DestReg0 = DestReg;
5740 Register DestReg1 = DestReg;
5741 bool IsUndef = true;
5742 if (DestReg.isPhysical()) {
5743 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5744 SubIdx0 = 0;
5745 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5746 SubIdx1 = 0;
5747 IsUndef = false;
5748 }
5749 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5750 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5751 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5752 .addFrameIndex(FI)
5753 .addImm(0)
5754 .addMemOperand(MMO);
5755}
5756
5759 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5760 Register VReg, MachineInstr::MIFlag Flags) const {
5761 MachineFunction &MF = *MBB.getParent();
5762 MachineFrameInfo &MFI = MF.getFrameInfo();
5764 MachineMemOperand *MMO =
5766 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5767
5768 unsigned Opc = 0;
5769 bool Offset = true;
5770 unsigned StackID = TargetStackID::Default;
5772 switch (TRI->getSpillSize(*RC)) {
5773 case 1:
5774 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5775 Opc = AArch64::LDRBui;
5776 break;
5777 case 2: {
5778 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5779 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5780 Opc = AArch64::LDRHui;
5781 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5782 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5783 "Unexpected register load without SVE load instructions");
5784 if (IsPNR)
5785 PNRReg = DestReg;
5786 Opc = AArch64::LDR_PXI;
5788 }
5789 break;
5790 }
5791 case 4:
5792 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5793 Opc = AArch64::LDRWui;
5794 if (DestReg.isVirtual())
5795 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5796 else
5797 assert(DestReg != AArch64::WSP);
5798 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5799 Opc = AArch64::LDRSui;
5800 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5801 Opc = AArch64::LDR_PPXI;
5803 }
5804 break;
5805 case 8:
5806 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5807 Opc = AArch64::LDRXui;
5808 if (DestReg.isVirtual())
5809 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5810 else
5811 assert(DestReg != AArch64::SP);
5812 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5813 Opc = AArch64::LDRDui;
5814 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5816 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5817 AArch64::subo32, FI, MMO);
5818 return;
5819 }
5820 break;
5821 case 16:
5822 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5823 Opc = AArch64::LDRQui;
5824 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5825 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5826 Opc = AArch64::LD1Twov1d;
5827 Offset = false;
5828 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5830 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5831 AArch64::subo64, FI, MMO);
5832 return;
5833 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5834 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5835 "Unexpected register load without SVE load instructions");
5836 Opc = AArch64::LDR_ZXI;
5838 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5839 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5840 "Unexpected predicate load without SVE load instructions");
5841 Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO;
5843 }
5844 break;
5845 case 24:
5846 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5847 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5848 Opc = AArch64::LD1Threev1d;
5849 Offset = false;
5850 }
5851 break;
5852 case 32:
5853 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5854 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5855 Opc = AArch64::LD1Fourv1d;
5856 Offset = false;
5857 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5858 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5859 Opc = AArch64::LD1Twov2d;
5860 Offset = false;
5861 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5862 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5863 "Unexpected register load without SVE load instructions");
5864 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
5866 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5867 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5868 "Unexpected register load without SVE load instructions");
5869 Opc = AArch64::LDR_ZZXI;
5871 }
5872 break;
5873 case 48:
5874 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5875 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5876 Opc = AArch64::LD1Threev2d;
5877 Offset = false;
5878 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5879 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5880 "Unexpected register load without SVE load instructions");
5881 Opc = AArch64::LDR_ZZZXI;
5883 }
5884 break;
5885 case 64:
5886 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5887 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5888 Opc = AArch64::LD1Fourv2d;
5889 Offset = false;
5890 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5891 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5892 "Unexpected register load without SVE load instructions");
5893 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
5895 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5896 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5897 "Unexpected register load without SVE load instructions");
5898 Opc = AArch64::LDR_ZZZZXI;
5900 }
5901 break;
5902 }
5903
5904 assert(Opc && "Unknown register class");
5905 MFI.setStackID(FI, StackID);
5906
5908 .addReg(DestReg, getDefRegState(true))
5909 .addFrameIndex(FI);
5910 if (Offset)
5911 MI.addImm(0);
5912 if (PNRReg.isValid() && !PNRReg.isVirtual())
5913 MI.addDef(PNRReg, RegState::Implicit);
5914 MI.addMemOperand(MMO);
5915}
5916
5918 const MachineInstr &UseMI,
5919 const TargetRegisterInfo *TRI) {
5920 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5921 UseMI.getIterator()),
5922 [TRI](const MachineInstr &I) {
5923 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5924 I.readsRegister(AArch64::NZCV, TRI);
5925 });
5926}
5927
5928void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5929 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5930 // The smallest scalable element supported by scaled SVE addressing
5931 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5932 // byte offset must always be a multiple of 2.
5933 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5934
5935 // VGSized offsets are divided by '2', because the VG register is the
5936 // the number of 64bit granules as opposed to 128bit vector chunks,
5937 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5938 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5939 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5940 ByteSized = Offset.getFixed();
5941 VGSized = Offset.getScalable() / 2;
5942}
5943
5944/// Returns the offset in parts to which this frame offset can be
5945/// decomposed for the purpose of describing a frame offset.
5946/// For non-scalable offsets this is simply its byte size.
5947void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5948 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5949 int64_t &NumDataVectors) {
5950 // The smallest scalable element supported by scaled SVE addressing
5951 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5952 // byte offset must always be a multiple of 2.
5953 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5954
5955 NumBytes = Offset.getFixed();
5956 NumDataVectors = 0;
5957 NumPredicateVectors = Offset.getScalable() / 2;
5958 // This method is used to get the offsets to adjust the frame offset.
5959 // If the function requires ADDPL to be used and needs more than two ADDPL
5960 // instructions, part of the offset is folded into NumDataVectors so that it
5961 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5962 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5963 NumPredicateVectors > 62) {
5964 NumDataVectors = NumPredicateVectors / 8;
5965 NumPredicateVectors -= NumDataVectors * 8;
5966 }
5967}
5968
5969// Convenience function to create a DWARF expression for: Constant `Operation`.
5970// This helper emits compact sequences for common cases. For example, for`-15
5971// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
5974 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
5975 // -Constant (1 to 31)
5976 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
5977 Operation = dwarf::DW_OP_minus;
5978 } else if (Constant >= 0 && Constant <= 31) {
5979 // Literal value 0 to 31
5980 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
5981 } else {
5982 // Signed constant
5983 Expr.push_back(dwarf::DW_OP_consts);
5985 }
5986 return Expr.push_back(Operation);
5987}
5988
5989// Convenience function to create a DWARF expression for a register.
5990static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
5991 Expr.push_back((char)dwarf::DW_OP_bregx);
5993 Expr.push_back(0);
5994}
5995
5996// Convenience function to create a DWARF expression for loading a register from
5997// a CFA offset.
5999 int64_t OffsetFromDefCFA) {
6000 // This assumes the top of the DWARF stack contains the CFA.
6001 Expr.push_back(dwarf::DW_OP_dup);
6002 // Add the offset to the register.
6003 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
6004 // Dereference the address (loads a 64 bit value)..
6005 Expr.push_back(dwarf::DW_OP_deref);
6006}
6007
6008// Convenience function to create a comment for
6009// (+/-) NumBytes (* RegScale)?
6010static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6011 StringRef RegScale = {}) {
6012 if (NumBytes) {
6013 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6014 if (!RegScale.empty())
6015 Comment << ' ' << RegScale;
6016 }
6017}
6018
6019// Creates an MCCFIInstruction:
6020// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6022 unsigned Reg,
6023 const StackOffset &Offset) {
6024 int64_t NumBytes, NumVGScaledBytes;
6025 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6026 NumVGScaledBytes);
6027 std::string CommentBuffer;
6028 llvm::raw_string_ostream Comment(CommentBuffer);
6029
6030 if (Reg == AArch64::SP)
6031 Comment << "sp";
6032 else if (Reg == AArch64::FP)
6033 Comment << "fp";
6034 else
6035 Comment << printReg(Reg, &TRI);
6036
6037 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6038 SmallString<64> Expr;
6039 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6040 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6041 // Reg + NumBytes
6042 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6043 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6044 appendOffsetComment(NumBytes, Comment);
6045 if (NumVGScaledBytes) {
6046 // + VG * NumVGScaledBytes
6047 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6048 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6049 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6050 Expr.push_back(dwarf::DW_OP_plus);
6051 }
6052
6053 // Wrap this into DW_CFA_def_cfa.
6054 SmallString<64> DefCfaExpr;
6055 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6056 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6057 DefCfaExpr.append(Expr.str());
6058 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6059 Comment.str());
6060}
6061
6063 unsigned FrameReg, unsigned Reg,
6064 const StackOffset &Offset,
6065 bool LastAdjustmentWasScalable) {
6066 if (Offset.getScalable())
6067 return createDefCFAExpression(TRI, Reg, Offset);
6068
6069 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6070 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6071
6072 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6073 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6074}
6075
6078 const StackOffset &OffsetFromDefCFA,
6079 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6080 int64_t NumBytes, NumVGScaledBytes;
6081 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6082 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6083
6084 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6085
6086 // Non-scalable offsets can use DW_CFA_offset directly.
6087 if (!NumVGScaledBytes)
6088 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6089
6090 std::string CommentBuffer;
6091 llvm::raw_string_ostream Comment(CommentBuffer);
6092 Comment << printReg(Reg, &TRI) << " @ cfa";
6093
6094 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6095 assert(NumVGScaledBytes && "Expected scalable offset");
6096 SmallString<64> OffsetExpr;
6097 // + VG * NumVGScaledBytes
6098 StringRef VGRegScale;
6099 if (IncomingVGOffsetFromDefCFA) {
6100 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6101 VGRegScale = "* IncomingVG";
6102 } else {
6103 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6104 VGRegScale = "* VG";
6105 }
6106 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6107 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6108 OffsetExpr.push_back(dwarf::DW_OP_plus);
6109 if (NumBytes) {
6110 // + NumBytes
6111 appendOffsetComment(NumBytes, Comment);
6112 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6113 }
6114
6115 // Wrap this into DW_CFA_expression
6116 SmallString<64> CfaExpr;
6117 CfaExpr.push_back(dwarf::DW_CFA_expression);
6118 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6119 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6120 CfaExpr.append(OffsetExpr.str());
6121
6122 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6123 Comment.str());
6124}
6125
6126// Helper function to emit a frame offset adjustment from a given
6127// pointer (SrcReg), stored into DestReg. This function is explicit
6128// in that it requires the opcode.
6131 const DebugLoc &DL, unsigned DestReg,
6132 unsigned SrcReg, int64_t Offset, unsigned Opc,
6133 const TargetInstrInfo *TII,
6134 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6135 bool *HasWinCFI, bool EmitCFAOffset,
6136 StackOffset CFAOffset, unsigned FrameReg) {
6137 int Sign = 1;
6138 unsigned MaxEncoding, ShiftSize;
6139 switch (Opc) {
6140 case AArch64::ADDXri:
6141 case AArch64::ADDSXri:
6142 case AArch64::SUBXri:
6143 case AArch64::SUBSXri:
6144 MaxEncoding = 0xfff;
6145 ShiftSize = 12;
6146 break;
6147 case AArch64::ADDVL_XXI:
6148 case AArch64::ADDPL_XXI:
6149 case AArch64::ADDSVL_XXI:
6150 case AArch64::ADDSPL_XXI:
6151 MaxEncoding = 31;
6152 ShiftSize = 0;
6153 if (Offset < 0) {
6154 MaxEncoding = 32;
6155 Sign = -1;
6156 Offset = -Offset;
6157 }
6158 break;
6159 default:
6160 llvm_unreachable("Unsupported opcode");
6161 }
6162
6163 // `Offset` can be in bytes or in "scalable bytes".
6164 int VScale = 1;
6165 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6166 VScale = 16;
6167 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6168 VScale = 2;
6169
6170 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6171 // scratch register. If DestReg is a virtual register, use it as the
6172 // scratch register; otherwise, create a new virtual register (to be
6173 // replaced by the scavenger at the end of PEI). That case can be optimized
6174 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6175 // register can be loaded with offset%8 and the add/sub can use an extending
6176 // instruction with LSL#3.
6177 // Currently the function handles any offsets but generates a poor sequence
6178 // of code.
6179 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6180
6181 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6182 Register TmpReg = DestReg;
6183 if (TmpReg == AArch64::XZR)
6184 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6185 &AArch64::GPR64RegClass);
6186 do {
6187 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6188 unsigned LocalShiftSize = 0;
6189 if (ThisVal > MaxEncoding) {
6190 ThisVal = ThisVal >> ShiftSize;
6191 LocalShiftSize = ShiftSize;
6192 }
6193 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6194 "Encoding cannot handle value that big");
6195
6196 Offset -= ThisVal << LocalShiftSize;
6197 if (Offset == 0)
6198 TmpReg = DestReg;
6199 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6200 .addReg(SrcReg)
6201 .addImm(Sign * (int)ThisVal);
6202 if (ShiftSize)
6203 MBI = MBI.addImm(
6205 MBI = MBI.setMIFlag(Flag);
6206
6207 auto Change =
6208 VScale == 1
6209 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6210 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6211 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6212 CFAOffset += Change;
6213 else
6214 CFAOffset -= Change;
6215 if (EmitCFAOffset && DestReg == TmpReg) {
6216 MachineFunction &MF = *MBB.getParent();
6217 const TargetSubtargetInfo &STI = MF.getSubtarget();
6218 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6219
6220 unsigned CFIIndex = MF.addFrameInst(
6221 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6222 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6223 .addCFIIndex(CFIIndex)
6224 .setMIFlags(Flag);
6225 }
6226
6227 if (NeedsWinCFI) {
6228 int Imm = (int)(ThisVal << LocalShiftSize);
6229 if (VScale != 1 && DestReg == AArch64::SP) {
6230 if (HasWinCFI)
6231 *HasWinCFI = true;
6232 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6233 .addImm(ThisVal)
6234 .setMIFlag(Flag);
6235 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6236 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6237 assert(VScale == 1 && "Expected non-scalable operation");
6238 if (HasWinCFI)
6239 *HasWinCFI = true;
6240 if (Imm == 0)
6241 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6242 else
6243 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6244 .addImm(Imm)
6245 .setMIFlag(Flag);
6246 assert(Offset == 0 && "Expected remaining offset to be zero to "
6247 "emit a single SEH directive");
6248 } else if (DestReg == AArch64::SP) {
6249 assert(VScale == 1 && "Expected non-scalable operation");
6250 if (HasWinCFI)
6251 *HasWinCFI = true;
6252 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6253 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6254 .addImm(Imm)
6255 .setMIFlag(Flag);
6256 }
6257 }
6258
6259 SrcReg = TmpReg;
6260 } while (Offset);
6261}
6262
6265 unsigned DestReg, unsigned SrcReg,
6267 MachineInstr::MIFlag Flag, bool SetNZCV,
6268 bool NeedsWinCFI, bool *HasWinCFI,
6269 bool EmitCFAOffset, StackOffset CFAOffset,
6270 unsigned FrameReg) {
6271 // If a function is marked as arm_locally_streaming, then the runtime value of
6272 // vscale in the prologue/epilogue is different the runtime value of vscale
6273 // in the function's body. To avoid having to consider multiple vscales,
6274 // we can use `addsvl` to allocate any scalable stack-slots, which under
6275 // most circumstances will be only locals, not callee-save slots.
6276 const Function &F = MBB.getParent()->getFunction();
6277 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6278
6279 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6280 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6281 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6282
6283 // Insert ADDSXri for scalable offset at the end.
6284 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6285 if (NeedsFinalDefNZCV)
6286 SetNZCV = false;
6287
6288 // First emit non-scalable frame offsets, or a simple 'mov'.
6289 if (Bytes || (!Offset && SrcReg != DestReg)) {
6290 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6291 "SP increment/decrement not 8-byte aligned");
6292 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6293 if (Bytes < 0) {
6294 Bytes = -Bytes;
6295 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6296 }
6297 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6298 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6299 FrameReg);
6300 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6301 ? StackOffset::getFixed(-Bytes)
6302 : StackOffset::getFixed(Bytes);
6303 SrcReg = DestReg;
6304 FrameReg = DestReg;
6305 }
6306
6307 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6308 "WinCFI can't allocate fractions of an SVE data vector");
6309
6310 if (NumDataVectors) {
6311 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6312 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6313 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6314 FrameReg);
6315 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6316 SrcReg = DestReg;
6317 }
6318
6319 if (NumPredicateVectors) {
6320 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6321 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6322 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6323 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6324 FrameReg);
6325 }
6326
6327 if (NeedsFinalDefNZCV)
6328 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6329 .addReg(DestReg)
6330 .addImm(0)
6331 .addImm(0);
6332}
6333
6336 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6337 LiveIntervals *LIS, VirtRegMap *VRM) const {
6338 // This is a bit of a hack. Consider this instruction:
6339 //
6340 // %0 = COPY %sp; GPR64all:%0
6341 //
6342 // We explicitly chose GPR64all for the virtual register so such a copy might
6343 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6344 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6345 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6346 //
6347 // To prevent that, we are going to constrain the %0 register class here.
6348 if (MI.isFullCopy()) {
6349 Register DstReg = MI.getOperand(0).getReg();
6350 Register SrcReg = MI.getOperand(1).getReg();
6351 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6352 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6353 return nullptr;
6354 }
6355 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6356 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6357 return nullptr;
6358 }
6359 // Nothing can folded with copy from/to NZCV.
6360 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6361 return nullptr;
6362 }
6363
6364 // Handle the case where a copy is being spilled or filled but the source
6365 // and destination register class don't match. For example:
6366 //
6367 // %0 = COPY %xzr; GPR64common:%0
6368 //
6369 // In this case we can still safely fold away the COPY and generate the
6370 // following spill code:
6371 //
6372 // STRXui %xzr, %stack.0
6373 //
6374 // This also eliminates spilled cross register class COPYs (e.g. between x and
6375 // d regs) of the same size. For example:
6376 //
6377 // %0 = COPY %1; GPR64:%0, FPR64:%1
6378 //
6379 // will be filled as
6380 //
6381 // LDRDui %0, fi<#0>
6382 //
6383 // instead of
6384 //
6385 // LDRXui %Temp, fi<#0>
6386 // %0 = FMOV %Temp
6387 //
6388 if (MI.isCopy() && Ops.size() == 1 &&
6389 // Make sure we're only folding the explicit COPY defs/uses.
6390 (Ops[0] == 0 || Ops[0] == 1)) {
6391 bool IsSpill = Ops[0] == 0;
6392 bool IsFill = !IsSpill;
6394 const MachineRegisterInfo &MRI = MF.getRegInfo();
6395 MachineBasicBlock &MBB = *MI.getParent();
6396 const MachineOperand &DstMO = MI.getOperand(0);
6397 const MachineOperand &SrcMO = MI.getOperand(1);
6398 Register DstReg = DstMO.getReg();
6399 Register SrcReg = SrcMO.getReg();
6400 // This is slightly expensive to compute for physical regs since
6401 // getMinimalPhysRegClass is slow.
6402 auto getRegClass = [&](unsigned Reg) {
6403 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6404 : TRI.getMinimalPhysRegClass(Reg);
6405 };
6406
6407 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6408 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6409 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6410 "Mismatched register size in non subreg COPY");
6411 if (IsSpill)
6412 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6413 getRegClass(SrcReg), &TRI, Register());
6414 else
6415 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6416 getRegClass(DstReg), &TRI, Register());
6417 return &*--InsertPt;
6418 }
6419
6420 // Handle cases like spilling def of:
6421 //
6422 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6423 //
6424 // where the physical register source can be widened and stored to the full
6425 // virtual reg destination stack slot, in this case producing:
6426 //
6427 // STRXui %xzr, %stack.0
6428 //
6429 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6430 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6431 assert(SrcMO.getSubReg() == 0 &&
6432 "Unexpected subreg on physical register");
6433 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6434 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6435 Register());
6436 return &*--InsertPt;
6437 }
6438
6439 // Handle cases like filling use of:
6440 //
6441 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6442 //
6443 // where we can load the full virtual reg source stack slot, into the subreg
6444 // destination, in this case producing:
6445 //
6446 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6447 //
6448 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6449 const TargetRegisterClass *FillRC = nullptr;
6450 switch (DstMO.getSubReg()) {
6451 default:
6452 break;
6453 case AArch64::sub_32:
6454 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6455 FillRC = &AArch64::GPR32RegClass;
6456 break;
6457 case AArch64::ssub:
6458 FillRC = &AArch64::FPR32RegClass;
6459 break;
6460 case AArch64::dsub:
6461 FillRC = &AArch64::FPR64RegClass;
6462 break;
6463 }
6464
6465 if (FillRC) {
6466 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6467 TRI.getRegSizeInBits(*FillRC) &&
6468 "Mismatched regclass size on folded subreg COPY");
6469 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6470 Register());
6471 MachineInstr &LoadMI = *--InsertPt;
6472 MachineOperand &LoadDst = LoadMI.getOperand(0);
6473 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6474 LoadDst.setSubReg(DstMO.getSubReg());
6475 LoadDst.setIsUndef();
6476 return &LoadMI;
6477 }
6478 }
6479 }
6480
6481 // Cannot fold.
6482 return nullptr;
6483}
6484
6486 StackOffset &SOffset,
6487 bool *OutUseUnscaledOp,
6488 unsigned *OutUnscaledOp,
6489 int64_t *EmittableOffset) {
6490 // Set output values in case of early exit.
6491 if (EmittableOffset)
6492 *EmittableOffset = 0;
6493 if (OutUseUnscaledOp)
6494 *OutUseUnscaledOp = false;
6495 if (OutUnscaledOp)
6496 *OutUnscaledOp = 0;
6497
6498 // Exit early for structured vector spills/fills as they can't take an
6499 // immediate offset.
6500 switch (MI.getOpcode()) {
6501 default:
6502 break;
6503 case AArch64::LD1Rv1d:
6504 case AArch64::LD1Rv2s:
6505 case AArch64::LD1Rv2d:
6506 case AArch64::LD1Rv4h:
6507 case AArch64::LD1Rv4s:
6508 case AArch64::LD1Rv8b:
6509 case AArch64::LD1Rv8h:
6510 case AArch64::LD1Rv16b:
6511 case AArch64::LD1Twov2d:
6512 case AArch64::LD1Threev2d:
6513 case AArch64::LD1Fourv2d:
6514 case AArch64::LD1Twov1d:
6515 case AArch64::LD1Threev1d:
6516 case AArch64::LD1Fourv1d:
6517 case AArch64::ST1Twov2d:
6518 case AArch64::ST1Threev2d:
6519 case AArch64::ST1Fourv2d:
6520 case AArch64::ST1Twov1d:
6521 case AArch64::ST1Threev1d:
6522 case AArch64::ST1Fourv1d:
6523 case AArch64::ST1i8:
6524 case AArch64::ST1i16:
6525 case AArch64::ST1i32:
6526 case AArch64::ST1i64:
6527 case AArch64::IRG:
6528 case AArch64::IRGstack:
6529 case AArch64::STGloop:
6530 case AArch64::STZGloop:
6532 }
6533
6534 // Get the min/max offset and the scale.
6535 TypeSize ScaleValue(0U, false), Width(0U, false);
6536 int64_t MinOff, MaxOff;
6537 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6538 MaxOff))
6539 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6540
6541 // Construct the complete offset.
6542 bool IsMulVL = ScaleValue.isScalable();
6543 unsigned Scale = ScaleValue.getKnownMinValue();
6544 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6545
6546 const MachineOperand &ImmOpnd =
6547 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6548 Offset += ImmOpnd.getImm() * Scale;
6549
6550 // If the offset doesn't match the scale, we rewrite the instruction to
6551 // use the unscaled instruction instead. Likewise, if we have a negative
6552 // offset and there is an unscaled op to use.
6553 std::optional<unsigned> UnscaledOp =
6555 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6556 if (useUnscaledOp &&
6557 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6558 MaxOff))
6559 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6560
6561 Scale = ScaleValue.getKnownMinValue();
6562 assert(IsMulVL == ScaleValue.isScalable() &&
6563 "Unscaled opcode has different value for scalable");
6564
6565 int64_t Remainder = Offset % Scale;
6566 assert(!(Remainder && useUnscaledOp) &&
6567 "Cannot have remainder when using unscaled op");
6568
6569 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6570 int64_t NewOffset = Offset / Scale;
6571 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6572 Offset = Remainder;
6573 else {
6574 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6575 Offset = Offset - (NewOffset * Scale);
6576 }
6577
6578 if (EmittableOffset)
6579 *EmittableOffset = NewOffset;
6580 if (OutUseUnscaledOp)
6581 *OutUseUnscaledOp = useUnscaledOp;
6582 if (OutUnscaledOp && UnscaledOp)
6583 *OutUnscaledOp = *UnscaledOp;
6584
6585 if (IsMulVL)
6586 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6587 else
6588 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6590 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6591}
6592
6594 unsigned FrameReg, StackOffset &Offset,
6595 const AArch64InstrInfo *TII) {
6596 unsigned Opcode = MI.getOpcode();
6597 unsigned ImmIdx = FrameRegIdx + 1;
6598
6599 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6600 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6601 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6602 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6603 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6604 MI.eraseFromParent();
6605 Offset = StackOffset();
6606 return true;
6607 }
6608
6609 int64_t NewOffset;
6610 unsigned UnscaledOp;
6611 bool UseUnscaledOp;
6612 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6613 &UnscaledOp, &NewOffset);
6616 // Replace the FrameIndex with FrameReg.
6617 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6618 if (UseUnscaledOp)
6619 MI.setDesc(TII->get(UnscaledOp));
6620
6621 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6622 return !Offset;
6623 }
6624
6625 return false;
6626}
6627
6633
6635 return MCInstBuilder(AArch64::HINT).addImm(0);
6636}
6637
6638// AArch64 supports MachineCombiner.
6639bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6640
6641// True when Opc sets flag
6642static bool isCombineInstrSettingFlag(unsigned Opc) {
6643 switch (Opc) {
6644 case AArch64::ADDSWrr:
6645 case AArch64::ADDSWri:
6646 case AArch64::ADDSXrr:
6647 case AArch64::ADDSXri:
6648 case AArch64::SUBSWrr:
6649 case AArch64::SUBSXrr:
6650 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6651 case AArch64::SUBSWri:
6652 case AArch64::SUBSXri:
6653 return true;
6654 default:
6655 break;
6656 }
6657 return false;
6658}
6659
6660// 32b Opcodes that can be combined with a MUL
6661static bool isCombineInstrCandidate32(unsigned Opc) {
6662 switch (Opc) {
6663 case AArch64::ADDWrr:
6664 case AArch64::ADDWri:
6665 case AArch64::SUBWrr:
6666 case AArch64::ADDSWrr:
6667 case AArch64::ADDSWri:
6668 case AArch64::SUBSWrr:
6669 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6670 case AArch64::SUBWri:
6671 case AArch64::SUBSWri:
6672 return true;
6673 default:
6674 break;
6675 }
6676 return false;
6677}
6678
6679// 64b Opcodes that can be combined with a MUL
6680static bool isCombineInstrCandidate64(unsigned Opc) {
6681 switch (Opc) {
6682 case AArch64::ADDXrr:
6683 case AArch64::ADDXri:
6684 case AArch64::SUBXrr:
6685 case AArch64::ADDSXrr:
6686 case AArch64::ADDSXri:
6687 case AArch64::SUBSXrr:
6688 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6689 case AArch64::SUBXri:
6690 case AArch64::SUBSXri:
6691 case AArch64::ADDv8i8:
6692 case AArch64::ADDv16i8:
6693 case AArch64::ADDv4i16:
6694 case AArch64::ADDv8i16:
6695 case AArch64::ADDv2i32:
6696 case AArch64::ADDv4i32:
6697 case AArch64::SUBv8i8:
6698 case AArch64::SUBv16i8:
6699 case AArch64::SUBv4i16:
6700 case AArch64::SUBv8i16:
6701 case AArch64::SUBv2i32:
6702 case AArch64::SUBv4i32:
6703 return true;
6704 default:
6705 break;
6706 }
6707 return false;
6708}
6709
6710// FP Opcodes that can be combined with a FMUL.
6711static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6712 switch (Inst.getOpcode()) {
6713 default:
6714 break;
6715 case AArch64::FADDHrr:
6716 case AArch64::FADDSrr:
6717 case AArch64::FADDDrr:
6718 case AArch64::FADDv4f16:
6719 case AArch64::FADDv8f16:
6720 case AArch64::FADDv2f32:
6721 case AArch64::FADDv2f64:
6722 case AArch64::FADDv4f32:
6723 case AArch64::FSUBHrr:
6724 case AArch64::FSUBSrr:
6725 case AArch64::FSUBDrr:
6726 case AArch64::FSUBv4f16:
6727 case AArch64::FSUBv8f16:
6728 case AArch64::FSUBv2f32:
6729 case AArch64::FSUBv2f64:
6730 case AArch64::FSUBv4f32:
6732 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6733 // the target options or if FADD/FSUB has the contract fast-math flag.
6734 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
6736 }
6737 return false;
6738}
6739
6740// Opcodes that can be combined with a MUL
6744
6745//
6746// Utility routine that checks if \param MO is defined by an
6747// \param CombineOpc instruction in the basic block \param MBB
6749 unsigned CombineOpc, unsigned ZeroReg = 0,
6750 bool CheckZeroReg = false) {
6751 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6752 MachineInstr *MI = nullptr;
6753
6754 if (MO.isReg() && MO.getReg().isVirtual())
6755 MI = MRI.getUniqueVRegDef(MO.getReg());
6756 // And it needs to be in the trace (otherwise, it won't have a depth).
6757 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
6758 return false;
6759 // Must only used by the user we combine with.
6760 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6761 return false;
6762
6763 if (CheckZeroReg) {
6764 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6765 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6766 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6767 // The third input reg must be zero.
6768 if (MI->getOperand(3).getReg() != ZeroReg)
6769 return false;
6770 }
6771
6772 if (isCombineInstrSettingFlag(CombineOpc) &&
6773 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6774 return false;
6775
6776 return true;
6777}
6778
6779//
6780// Is \param MO defined by an integer multiply and can be combined?
6782 unsigned MulOpc, unsigned ZeroReg) {
6783 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6784}
6785
6786//
6787// Is \param MO defined by a floating-point multiply and can be combined?
6789 unsigned MulOpc) {
6790 return canCombine(MBB, MO, MulOpc);
6791}
6792
6793// TODO: There are many more machine instruction opcodes to match:
6794// 1. Other data types (integer, vectors)
6795// 2. Other math / logic operations (xor, or)
6796// 3. Other forms of the same operation (intrinsics and other variants)
6797bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
6798 bool Invert) const {
6799 if (Invert)
6800 return false;
6801 switch (Inst.getOpcode()) {
6802 // == Floating-point types ==
6803 // -- Floating-point instructions --
6804 case AArch64::FADDHrr:
6805 case AArch64::FADDSrr:
6806 case AArch64::FADDDrr:
6807 case AArch64::FMULHrr:
6808 case AArch64::FMULSrr:
6809 case AArch64::FMULDrr:
6810 case AArch64::FMULX16:
6811 case AArch64::FMULX32:
6812 case AArch64::FMULX64:
6813 // -- Advanced SIMD instructions --
6814 case AArch64::FADDv4f16:
6815 case AArch64::FADDv8f16:
6816 case AArch64::FADDv2f32:
6817 case AArch64::FADDv4f32:
6818 case AArch64::FADDv2f64:
6819 case AArch64::FMULv4f16:
6820 case AArch64::FMULv8f16:
6821 case AArch64::FMULv2f32:
6822 case AArch64::FMULv4f32:
6823 case AArch64::FMULv2f64:
6824 case AArch64::FMULXv4f16:
6825 case AArch64::FMULXv8f16:
6826 case AArch64::FMULXv2f32:
6827 case AArch64::FMULXv4f32:
6828 case AArch64::FMULXv2f64:
6829 // -- SVE instructions --
6830 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6831 // in the SVE instruction set (though there are predicated ones).
6832 case AArch64::FADD_ZZZ_H:
6833 case AArch64::FADD_ZZZ_S:
6834 case AArch64::FADD_ZZZ_D:
6835 case AArch64::FMUL_ZZZ_H:
6836 case AArch64::FMUL_ZZZ_S:
6837 case AArch64::FMUL_ZZZ_D:
6840
6841 // == Integer types ==
6842 // -- Base instructions --
6843 // Opcodes MULWrr and MULXrr don't exist because
6844 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6845 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6846 // The machine-combiner does not support three-source-operands machine
6847 // instruction. So we cannot reassociate MULs.
6848 case AArch64::ADDWrr:
6849 case AArch64::ADDXrr:
6850 case AArch64::ANDWrr:
6851 case AArch64::ANDXrr:
6852 case AArch64::ORRWrr:
6853 case AArch64::ORRXrr:
6854 case AArch64::EORWrr:
6855 case AArch64::EORXrr:
6856 case AArch64::EONWrr:
6857 case AArch64::EONXrr:
6858 // -- Advanced SIMD instructions --
6859 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6860 // in the Advanced SIMD instruction set.
6861 case AArch64::ADDv8i8:
6862 case AArch64::ADDv16i8:
6863 case AArch64::ADDv4i16:
6864 case AArch64::ADDv8i16:
6865 case AArch64::ADDv2i32:
6866 case AArch64::ADDv4i32:
6867 case AArch64::ADDv1i64:
6868 case AArch64::ADDv2i64:
6869 case AArch64::MULv8i8:
6870 case AArch64::MULv16i8:
6871 case AArch64::MULv4i16:
6872 case AArch64::MULv8i16:
6873 case AArch64::MULv2i32:
6874 case AArch64::MULv4i32:
6875 case AArch64::ANDv8i8:
6876 case AArch64::ANDv16i8:
6877 case AArch64::ORRv8i8:
6878 case AArch64::ORRv16i8:
6879 case AArch64::EORv8i8:
6880 case AArch64::EORv16i8:
6881 // -- SVE instructions --
6882 case AArch64::ADD_ZZZ_B:
6883 case AArch64::ADD_ZZZ_H:
6884 case AArch64::ADD_ZZZ_S:
6885 case AArch64::ADD_ZZZ_D:
6886 case AArch64::MUL_ZZZ_B:
6887 case AArch64::MUL_ZZZ_H:
6888 case AArch64::MUL_ZZZ_S:
6889 case AArch64::MUL_ZZZ_D:
6890 case AArch64::AND_ZZZ:
6891 case AArch64::ORR_ZZZ:
6892 case AArch64::EOR_ZZZ:
6893 return true;
6894
6895 default:
6896 return false;
6897 }
6898}
6899
6900/// Find instructions that can be turned into madd.
6902 SmallVectorImpl<unsigned> &Patterns) {
6903 unsigned Opc = Root.getOpcode();
6904 MachineBasicBlock &MBB = *Root.getParent();
6905 bool Found = false;
6906
6908 return false;
6910 int Cmp_NZCV =
6911 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6912 // When NZCV is live bail out.
6913 if (Cmp_NZCV == -1)
6914 return false;
6915 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6916 // When opcode can't change bail out.
6917 // CHECKME: do we miss any cases for opcode conversion?
6918 if (NewOpc == Opc)
6919 return false;
6920 Opc = NewOpc;
6921 }
6922
6923 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6924 unsigned Pattern) {
6925 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6926 Patterns.push_back(Pattern);
6927 Found = true;
6928 }
6929 };
6930
6931 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6932 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6933 Patterns.push_back(Pattern);
6934 Found = true;
6935 }
6936 };
6937
6939
6940 switch (Opc) {
6941 default:
6942 break;
6943 case AArch64::ADDWrr:
6944 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6945 "ADDWrr does not have register operands");
6946 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6947 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6948 break;
6949 case AArch64::ADDXrr:
6950 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6951 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6952 break;
6953 case AArch64::SUBWrr:
6954 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6955 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6956 break;
6957 case AArch64::SUBXrr:
6958 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6959 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6960 break;
6961 case AArch64::ADDWri:
6962 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6963 break;
6964 case AArch64::ADDXri:
6965 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6966 break;
6967 case AArch64::SUBWri:
6968 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6969 break;
6970 case AArch64::SUBXri:
6971 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6972 break;
6973 case AArch64::ADDv8i8:
6974 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6975 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6976 break;
6977 case AArch64::ADDv16i8:
6978 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6979 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6980 break;
6981 case AArch64::ADDv4i16:
6982 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6983 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6984 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6985 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6986 break;
6987 case AArch64::ADDv8i16:
6988 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6989 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6990 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6991 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6992 break;
6993 case AArch64::ADDv2i32:
6994 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6995 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6996 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6997 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6998 break;
6999 case AArch64::ADDv4i32:
7000 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
7001 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
7002 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
7003 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
7004 break;
7005 case AArch64::SUBv8i8:
7006 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7007 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7008 break;
7009 case AArch64::SUBv16i8:
7010 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7011 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7012 break;
7013 case AArch64::SUBv4i16:
7014 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7015 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7016 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7017 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7018 break;
7019 case AArch64::SUBv8i16:
7020 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7021 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7022 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7023 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7024 break;
7025 case AArch64::SUBv2i32:
7026 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7027 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7028 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7029 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7030 break;
7031 case AArch64::SUBv4i32:
7032 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7033 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7034 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7035 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7036 break;
7037 }
7038 return Found;
7039}
7040
7041bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7042 switch (Opcode) {
7043 default:
7044 break;
7045 case AArch64::UABALB_ZZZ_D:
7046 case AArch64::UABALB_ZZZ_H:
7047 case AArch64::UABALB_ZZZ_S:
7048 case AArch64::UABALT_ZZZ_D:
7049 case AArch64::UABALT_ZZZ_H:
7050 case AArch64::UABALT_ZZZ_S:
7051 case AArch64::SABALB_ZZZ_D:
7052 case AArch64::SABALB_ZZZ_S:
7053 case AArch64::SABALB_ZZZ_H:
7054 case AArch64::SABALT_ZZZ_D:
7055 case AArch64::SABALT_ZZZ_S:
7056 case AArch64::SABALT_ZZZ_H:
7057 case AArch64::UABALv16i8_v8i16:
7058 case AArch64::UABALv2i32_v2i64:
7059 case AArch64::UABALv4i16_v4i32:
7060 case AArch64::UABALv4i32_v2i64:
7061 case AArch64::UABALv8i16_v4i32:
7062 case AArch64::UABALv8i8_v8i16:
7063 case AArch64::UABAv16i8:
7064 case AArch64::UABAv2i32:
7065 case AArch64::UABAv4i16:
7066 case AArch64::UABAv4i32:
7067 case AArch64::UABAv8i16:
7068 case AArch64::UABAv8i8:
7069 case AArch64::SABALv16i8_v8i16:
7070 case AArch64::SABALv2i32_v2i64:
7071 case AArch64::SABALv4i16_v4i32:
7072 case AArch64::SABALv4i32_v2i64:
7073 case AArch64::SABALv8i16_v4i32:
7074 case AArch64::SABALv8i8_v8i16:
7075 case AArch64::SABAv16i8:
7076 case AArch64::SABAv2i32:
7077 case AArch64::SABAv4i16:
7078 case AArch64::SABAv4i32:
7079 case AArch64::SABAv8i16:
7080 case AArch64::SABAv8i8:
7081 return true;
7082 }
7083
7084 return false;
7085}
7086
7087unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7088 unsigned AccumulationOpcode) const {
7089 switch (AccumulationOpcode) {
7090 default:
7091 llvm_unreachable("Unsupported accumulation Opcode!");
7092 case AArch64::UABALB_ZZZ_D:
7093 return AArch64::UABDLB_ZZZ_D;
7094 case AArch64::UABALB_ZZZ_H:
7095 return AArch64::UABDLB_ZZZ_H;
7096 case AArch64::UABALB_ZZZ_S:
7097 return AArch64::UABDLB_ZZZ_S;
7098 case AArch64::UABALT_ZZZ_D:
7099 return AArch64::UABDLT_ZZZ_D;
7100 case AArch64::UABALT_ZZZ_H:
7101 return AArch64::UABDLT_ZZZ_H;
7102 case AArch64::UABALT_ZZZ_S:
7103 return AArch64::UABDLT_ZZZ_S;
7104 case AArch64::UABALv16i8_v8i16:
7105 return AArch64::UABDLv16i8_v8i16;
7106 case AArch64::UABALv2i32_v2i64:
7107 return AArch64::UABDLv2i32_v2i64;
7108 case AArch64::UABALv4i16_v4i32:
7109 return AArch64::UABDLv4i16_v4i32;
7110 case AArch64::UABALv4i32_v2i64:
7111 return AArch64::UABDLv4i32_v2i64;
7112 case AArch64::UABALv8i16_v4i32:
7113 return AArch64::UABDLv8i16_v4i32;
7114 case AArch64::UABALv8i8_v8i16:
7115 return AArch64::UABDLv8i8_v8i16;
7116 case AArch64::UABAv16i8:
7117 return AArch64::UABDv16i8;
7118 case AArch64::UABAv2i32:
7119 return AArch64::UABDv2i32;
7120 case AArch64::UABAv4i16:
7121 return AArch64::UABDv4i16;
7122 case AArch64::UABAv4i32:
7123 return AArch64::UABDv4i32;
7124 case AArch64::UABAv8i16:
7125 return AArch64::UABDv8i16;
7126 case AArch64::UABAv8i8:
7127 return AArch64::UABDv8i8;
7128 case AArch64::SABALB_ZZZ_D:
7129 return AArch64::SABDLB_ZZZ_D;
7130 case AArch64::SABALB_ZZZ_S:
7131 return AArch64::SABDLB_ZZZ_S;
7132 case AArch64::SABALB_ZZZ_H:
7133 return AArch64::SABDLB_ZZZ_H;
7134 case AArch64::SABALT_ZZZ_D:
7135 return AArch64::SABDLT_ZZZ_D;
7136 case AArch64::SABALT_ZZZ_S:
7137 return AArch64::SABDLT_ZZZ_S;
7138 case AArch64::SABALT_ZZZ_H:
7139 return AArch64::SABDLT_ZZZ_H;
7140 case AArch64::SABALv16i8_v8i16:
7141 return AArch64::SABDLv16i8_v8i16;
7142 case AArch64::SABALv2i32_v2i64:
7143 return AArch64::SABDLv2i32_v2i64;
7144 case AArch64::SABALv4i16_v4i32:
7145 return AArch64::SABDLv4i16_v4i32;
7146 case AArch64::SABALv4i32_v2i64:
7147 return AArch64::SABDLv4i32_v2i64;
7148 case AArch64::SABALv8i16_v4i32:
7149 return AArch64::SABDLv8i16_v4i32;
7150 case AArch64::SABALv8i8_v8i16:
7151 return AArch64::SABDLv8i8_v8i16;
7152 case AArch64::SABAv16i8:
7153 return AArch64::SABDv16i8;
7154 case AArch64::SABAv2i32:
7155 return AArch64::SABAv2i32;
7156 case AArch64::SABAv4i16:
7157 return AArch64::SABDv4i16;
7158 case AArch64::SABAv4i32:
7159 return AArch64::SABDv4i32;
7160 case AArch64::SABAv8i16:
7161 return AArch64::SABDv8i16;
7162 case AArch64::SABAv8i8:
7163 return AArch64::SABDv8i8;
7164 }
7165}
7166
7167/// Floating-Point Support
7168
7169/// Find instructions that can be turned into madd.
7171 SmallVectorImpl<unsigned> &Patterns) {
7172
7173 if (!isCombineInstrCandidateFP(Root))
7174 return false;
7175
7176 MachineBasicBlock &MBB = *Root.getParent();
7177 bool Found = false;
7178
7179 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7180 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7181 Patterns.push_back(Pattern);
7182 return true;
7183 }
7184 return false;
7185 };
7186
7188
7189 switch (Root.getOpcode()) {
7190 default:
7191 assert(false && "Unsupported FP instruction in combiner\n");
7192 break;
7193 case AArch64::FADDHrr:
7194 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7195 "FADDHrr does not have register operands");
7196
7197 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7198 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7199 break;
7200 case AArch64::FADDSrr:
7201 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7202 "FADDSrr does not have register operands");
7203
7204 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7205 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7206
7207 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7208 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7209 break;
7210 case AArch64::FADDDrr:
7211 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7212 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7213
7214 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7215 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7216 break;
7217 case AArch64::FADDv4f16:
7218 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7219 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7220
7221 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7222 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7223 break;
7224 case AArch64::FADDv8f16:
7225 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7226 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7227
7228 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7229 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7230 break;
7231 case AArch64::FADDv2f32:
7232 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7233 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7234
7235 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7236 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7237 break;
7238 case AArch64::FADDv2f64:
7239 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7240 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7241
7242 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7243 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7244 break;
7245 case AArch64::FADDv4f32:
7246 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7247 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7248
7249 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7250 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7251 break;
7252 case AArch64::FSUBHrr:
7253 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7254 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7255 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7256 break;
7257 case AArch64::FSUBSrr:
7258 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7259
7260 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7261 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7262
7263 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7264 break;
7265 case AArch64::FSUBDrr:
7266 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7267
7268 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7269 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7270
7271 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7272 break;
7273 case AArch64::FSUBv4f16:
7274 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7275 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7276
7277 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7278 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7279 break;
7280 case AArch64::FSUBv8f16:
7281 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7282 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7283
7284 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7285 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7286 break;
7287 case AArch64::FSUBv2f32:
7288 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7289 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7290
7291 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7292 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7293 break;
7294 case AArch64::FSUBv2f64:
7295 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7296 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7297
7298 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7299 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7300 break;
7301 case AArch64::FSUBv4f32:
7302 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7303 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7304
7305 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7306 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7307 break;
7308 }
7309 return Found;
7310}
7311
7313 SmallVectorImpl<unsigned> &Patterns) {
7314 MachineBasicBlock &MBB = *Root.getParent();
7315 bool Found = false;
7316
7317 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7318 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7319 MachineOperand &MO = Root.getOperand(Operand);
7320 MachineInstr *MI = nullptr;
7321 if (MO.isReg() && MO.getReg().isVirtual())
7322 MI = MRI.getUniqueVRegDef(MO.getReg());
7323 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7324 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7325 MI->getOperand(1).getReg().isVirtual())
7326 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7327 if (MI && MI->getOpcode() == Opcode) {
7328 Patterns.push_back(Pattern);
7329 return true;
7330 }
7331 return false;
7332 };
7333
7335
7336 switch (Root.getOpcode()) {
7337 default:
7338 return false;
7339 case AArch64::FMULv2f32:
7340 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7341 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7342 break;
7343 case AArch64::FMULv2f64:
7344 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7345 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7346 break;
7347 case AArch64::FMULv4f16:
7348 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7349 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7350 break;
7351 case AArch64::FMULv4f32:
7352 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7353 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7354 break;
7355 case AArch64::FMULv8f16:
7356 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7357 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7358 break;
7359 }
7360
7361 return Found;
7362}
7363
7365 SmallVectorImpl<unsigned> &Patterns) {
7366 unsigned Opc = Root.getOpcode();
7367 MachineBasicBlock &MBB = *Root.getParent();
7368 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7369
7370 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7371 MachineOperand &MO = Root.getOperand(1);
7372 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7373 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7374 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7378 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7379 Patterns.push_back(Pattern);
7380 return true;
7381 }
7382 return false;
7383 };
7384
7385 switch (Opc) {
7386 default:
7387 break;
7388 case AArch64::FNEGDr:
7389 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7390 case AArch64::FNEGSr:
7391 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7392 }
7393
7394 return false;
7395}
7396
7397/// Return true when a code sequence can improve throughput. It
7398/// should be called only for instructions in loops.
7399/// \param Pattern - combiner pattern
7401 switch (Pattern) {
7402 default:
7403 break;
7509 return true;
7510 } // end switch (Pattern)
7511 return false;
7512}
7513
7514/// Find other MI combine patterns.
7516 SmallVectorImpl<unsigned> &Patterns) {
7517 // A - (B + C) ==> (A - B) - C or (A - C) - B
7518 unsigned Opc = Root.getOpcode();
7519 MachineBasicBlock &MBB = *Root.getParent();
7520
7521 switch (Opc) {
7522 case AArch64::SUBWrr:
7523 case AArch64::SUBSWrr:
7524 case AArch64::SUBXrr:
7525 case AArch64::SUBSXrr:
7526 // Found candidate root.
7527 break;
7528 default:
7529 return false;
7530 }
7531
7533 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7534 -1)
7535 return false;
7536
7537 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7538 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7539 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7540 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7543 return true;
7544 }
7545
7546 return false;
7547}
7548
7549/// Check if the given instruction forms a gather load pattern that can be
7550/// optimized for better Memory-Level Parallelism (MLP). This function
7551/// identifies chains of NEON lane load instructions that load data from
7552/// different memory addresses into individual lanes of a 128-bit vector
7553/// register, then attempts to split the pattern into parallel loads to break
7554/// the serial dependency between instructions.
7555///
7556/// Pattern Matched:
7557/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7558/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7559///
7560/// Transformed Into:
7561/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7562/// to combine the results, enabling better memory-level parallelism.
7563///
7564/// Supported Element Types:
7565/// - 32-bit elements (LD1i32, 4 lanes total)
7566/// - 16-bit elements (LD1i16, 8 lanes total)
7567/// - 8-bit elements (LD1i8, 16 lanes total)
7569 SmallVectorImpl<unsigned> &Patterns,
7570 unsigned LoadLaneOpCode, unsigned NumLanes) {
7571 const MachineFunction *MF = Root.getMF();
7572
7573 // Early exit if optimizing for size.
7574 if (MF->getFunction().hasMinSize())
7575 return false;
7576
7577 const MachineRegisterInfo &MRI = MF->getRegInfo();
7579
7580 // The root of the pattern must load into the last lane of the vector.
7581 if (Root.getOperand(2).getImm() != NumLanes - 1)
7582 return false;
7583
7584 // Check that we have load into all lanes except lane 0.
7585 // For each load we also want to check that:
7586 // 1. It has a single non-debug use (since we will be replacing the virtual
7587 // register)
7588 // 2. That the addressing mode only uses a single pointer operand
7589 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7590 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7591 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7593 while (!RemainingLanes.empty() && CurrInstr &&
7594 CurrInstr->getOpcode() == LoadLaneOpCode &&
7595 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7596 CurrInstr->getNumOperands() == 4) {
7597 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7598 LoadInstrs.push_back(CurrInstr);
7599 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7600 }
7601
7602 // Check that we have found a match for lanes N-1.. 1.
7603 if (!RemainingLanes.empty())
7604 return false;
7605
7606 // Match the SUBREG_TO_REG sequence.
7607 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
7608 return false;
7609
7610 // Verify that the subreg to reg loads an integer into the first lane.
7611 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7612 unsigned SingleLaneSizeInBits = 128 / NumLanes;
7613 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
7614 return false;
7615
7616 // Verify that it also has a single non debug use.
7617 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
7618 return false;
7619
7620 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
7621
7622 // If there is any chance of aliasing, do not apply the pattern.
7623 // Walk backward through the MBB starting from Root.
7624 // Exit early if we've encountered all load instructions or hit the search
7625 // limit.
7626 auto MBBItr = Root.getIterator();
7627 unsigned RemainingSteps = GatherOptSearchLimit;
7628 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
7629 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7630 const MachineBasicBlock *MBB = Root.getParent();
7631
7632 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7633 !RemainingLoadInstrs.empty();
7634 --MBBItr, --RemainingSteps) {
7635 const MachineInstr &CurrInstr = *MBBItr;
7636
7637 // Remove this instruction from remaining loads if it's one we're tracking.
7638 RemainingLoadInstrs.erase(&CurrInstr);
7639
7640 // Check for potential aliasing with any of the load instructions to
7641 // optimize.
7642 if (CurrInstr.isLoadFoldBarrier())
7643 return false;
7644 }
7645
7646 // If we hit the search limit without finding all load instructions,
7647 // don't match the pattern.
7648 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7649 return false;
7650
7651 switch (NumLanes) {
7652 case 4:
7654 break;
7655 case 8:
7657 break;
7658 case 16:
7660 break;
7661 default:
7662 llvm_unreachable("Got bad number of lanes for gather pattern.");
7663 }
7664
7665 return true;
7666}
7667
7668/// Search for patterns of LD instructions we can optimize.
7670 SmallVectorImpl<unsigned> &Patterns) {
7671
7672 // The pattern searches for loads into single lanes.
7673 switch (Root.getOpcode()) {
7674 case AArch64::LD1i32:
7675 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
7676 case AArch64::LD1i16:
7677 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
7678 case AArch64::LD1i8:
7679 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
7680 default:
7681 return false;
7682 }
7683}
7684
7685/// Generate optimized instruction sequence for gather load patterns to improve
7686/// Memory-Level Parallelism (MLP). This function transforms a chain of
7687/// sequential NEON lane loads into parallel vector loads that can execute
7688/// concurrently.
7689static void
7693 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7694 unsigned Pattern, unsigned NumLanes) {
7695 MachineFunction &MF = *Root.getParent()->getParent();
7698
7699 // Gather the initial load instructions to build the pattern.
7700 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
7701 MachineInstr *CurrInstr = &Root;
7702 for (unsigned i = 0; i < NumLanes - 1; ++i) {
7703 LoadToLaneInstrs.push_back(CurrInstr);
7704 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7705 }
7706
7707 // Sort the load instructions according to the lane.
7708 llvm::sort(LoadToLaneInstrs,
7709 [](const MachineInstr *A, const MachineInstr *B) {
7710 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
7711 });
7712
7713 MachineInstr *SubregToReg = CurrInstr;
7714 LoadToLaneInstrs.push_back(
7715 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
7716 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
7717
7718 const TargetRegisterClass *FPR128RegClass =
7719 MRI.getRegClass(Root.getOperand(0).getReg());
7720
7721 // Helper lambda to create a LD1 instruction.
7722 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
7723 Register SrcRegister, unsigned Lane,
7724 Register OffsetRegister,
7725 bool OffsetRegisterKillState) {
7726 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
7727 MachineInstrBuilder LoadIndexIntoRegister =
7728 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
7729 NewRegister)
7730 .addReg(SrcRegister)
7731 .addImm(Lane)
7732 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
7733 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
7734 InsInstrs.push_back(LoadIndexIntoRegister);
7735 return NewRegister;
7736 };
7737
7738 // Helper to create load instruction based on the NumLanes in the NEON
7739 // register we are rewriting.
7740 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
7741 Register OffsetReg,
7742 bool KillState) -> MachineInstrBuilder {
7743 unsigned Opcode;
7744 switch (NumLanes) {
7745 case 4:
7746 Opcode = AArch64::LDRSui;
7747 break;
7748 case 8:
7749 Opcode = AArch64::LDRHui;
7750 break;
7751 case 16:
7752 Opcode = AArch64::LDRBui;
7753 break;
7754 default:
7756 "Got unsupported number of lanes in machine-combiner gather pattern");
7757 }
7758 // Immediate offset load
7759 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
7760 .addReg(OffsetReg)
7761 .addImm(0);
7762 };
7763
7764 // Load the remaining lanes into register 0.
7765 auto LanesToLoadToReg0 =
7766 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
7767 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
7768 Register PrevReg = SubregToReg->getOperand(0).getReg();
7769 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
7770 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7771 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7772 OffsetRegOperand.getReg(),
7773 OffsetRegOperand.isKill());
7774 DelInstrs.push_back(LoadInstr);
7775 }
7776 Register LastLoadReg0 = PrevReg;
7777
7778 // First load into register 1. Perform an integer load to zero out the upper
7779 // lanes in a single instruction.
7780 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
7781 MachineInstr *OriginalSplitLoad =
7782 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
7783 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
7784 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
7785
7786 const MachineOperand &OriginalSplitToLoadOffsetOperand =
7787 OriginalSplitLoad->getOperand(3);
7788 MachineInstrBuilder MiddleIndexLoadInstr =
7789 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
7790 OriginalSplitToLoadOffsetOperand.getReg(),
7791 OriginalSplitToLoadOffsetOperand.isKill());
7792
7793 InstrIdxForVirtReg.insert(
7794 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
7795 InsInstrs.push_back(MiddleIndexLoadInstr);
7796 DelInstrs.push_back(OriginalSplitLoad);
7797
7798 // Subreg To Reg instruction for register 1.
7799 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
7800 unsigned SubregType;
7801 switch (NumLanes) {
7802 case 4:
7803 SubregType = AArch64::ssub;
7804 break;
7805 case 8:
7806 SubregType = AArch64::hsub;
7807 break;
7808 case 16:
7809 SubregType = AArch64::bsub;
7810 break;
7811 default:
7813 "Got invalid NumLanes for machine-combiner gather pattern");
7814 }
7815
7816 auto SubRegToRegInstr =
7817 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
7818 DestRegForSubregToReg)
7819 .addImm(0)
7820 .addReg(DestRegForMiddleIndex, getKillRegState(true))
7821 .addImm(SubregType);
7822 InstrIdxForVirtReg.insert(
7823 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
7824 InsInstrs.push_back(SubRegToRegInstr);
7825
7826 // Load remaining lanes into register 1.
7827 auto LanesToLoadToReg1 =
7828 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
7829 LoadToLaneInstrsAscending.end());
7830 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
7831 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
7832 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7833 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7834 OffsetRegOperand.getReg(),
7835 OffsetRegOperand.isKill());
7836
7837 // Do not add the last reg to DelInstrs - it will be removed later.
7838 if (Index == NumLanes / 2 - 2) {
7839 break;
7840 }
7841 DelInstrs.push_back(LoadInstr);
7842 }
7843 Register LastLoadReg1 = PrevReg;
7844
7845 // Create the final zip instruction to combine the results.
7846 MachineInstrBuilder ZipInstr =
7847 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
7848 Root.getOperand(0).getReg())
7849 .addReg(LastLoadReg0)
7850 .addReg(LastLoadReg1);
7851 InsInstrs.push_back(ZipInstr);
7852}
7853
7867
7868/// Return true when there is potentially a faster code sequence for an
7869/// instruction chain ending in \p Root. All potential patterns are listed in
7870/// the \p Pattern vector. Pattern should be sorted in priority order since the
7871/// pattern evaluator stops checking as soon as it finds a faster sequence.
7872
7873bool AArch64InstrInfo::getMachineCombinerPatterns(
7874 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7875 bool DoRegPressureReduce) const {
7876 // Integer patterns
7877 if (getMaddPatterns(Root, Patterns))
7878 return true;
7879 // Floating point patterns
7880 if (getFMULPatterns(Root, Patterns))
7881 return true;
7882 if (getFMAPatterns(Root, Patterns))
7883 return true;
7884 if (getFNEGPatterns(Root, Patterns))
7885 return true;
7886
7887 // Other patterns
7888 if (getMiscPatterns(Root, Patterns))
7889 return true;
7890
7891 // Load patterns
7892 if (getLoadPatterns(Root, Patterns))
7893 return true;
7894
7895 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7896 DoRegPressureReduce);
7897}
7898
7900/// genFusedMultiply - Generate fused multiply instructions.
7901/// This function supports both integer and floating point instructions.
7902/// A typical example:
7903/// F|MUL I=A,B,0
7904/// F|ADD R,I,C
7905/// ==> F|MADD R,A,B,C
7906/// \param MF Containing MachineFunction
7907/// \param MRI Register information
7908/// \param TII Target information
7909/// \param Root is the F|ADD instruction
7910/// \param [out] InsInstrs is a vector of machine instructions and will
7911/// contain the generated madd instruction
7912/// \param IdxMulOpd is index of operand in Root that is the result of
7913/// the F|MUL. In the example above IdxMulOpd is 1.
7914/// \param MaddOpc the opcode fo the f|madd instruction
7915/// \param RC Register class of operands
7916/// \param kind of fma instruction (addressing mode) to be generated
7917/// \param ReplacedAddend is the result register from the instruction
7918/// replacing the non-combined operand, if any.
7919static MachineInstr *
7921 const TargetInstrInfo *TII, MachineInstr &Root,
7922 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7923 unsigned MaddOpc, const TargetRegisterClass *RC,
7925 const Register *ReplacedAddend = nullptr) {
7926 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7927
7928 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7929 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7930 Register ResultReg = Root.getOperand(0).getReg();
7931 Register SrcReg0 = MUL->getOperand(1).getReg();
7932 bool Src0IsKill = MUL->getOperand(1).isKill();
7933 Register SrcReg1 = MUL->getOperand(2).getReg();
7934 bool Src1IsKill = MUL->getOperand(2).isKill();
7935
7936 Register SrcReg2;
7937 bool Src2IsKill;
7938 if (ReplacedAddend) {
7939 // If we just generated a new addend, we must be it's only use.
7940 SrcReg2 = *ReplacedAddend;
7941 Src2IsKill = true;
7942 } else {
7943 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7944 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7945 }
7946
7947 if (ResultReg.isVirtual())
7948 MRI.constrainRegClass(ResultReg, RC);
7949 if (SrcReg0.isVirtual())
7950 MRI.constrainRegClass(SrcReg0, RC);
7951 if (SrcReg1.isVirtual())
7952 MRI.constrainRegClass(SrcReg1, RC);
7953 if (SrcReg2.isVirtual())
7954 MRI.constrainRegClass(SrcReg2, RC);
7955
7957 if (kind == FMAInstKind::Default)
7958 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7959 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7960 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7961 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7962 else if (kind == FMAInstKind::Indexed)
7963 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7964 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7965 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7966 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7967 .addImm(MUL->getOperand(3).getImm());
7968 else if (kind == FMAInstKind::Accumulator)
7969 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7970 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7971 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7972 .addReg(SrcReg1, getKillRegState(Src1IsKill));
7973 else
7974 assert(false && "Invalid FMA instruction kind \n");
7975 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7976 InsInstrs.push_back(MIB);
7977 return MUL;
7978}
7979
7980static MachineInstr *
7982 const TargetInstrInfo *TII, MachineInstr &Root,
7984 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7985
7986 unsigned Opc = 0;
7987 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
7988 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
7989 Opc = AArch64::FNMADDSrrr;
7990 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
7991 Opc = AArch64::FNMADDDrrr;
7992 else
7993 return nullptr;
7994
7995 Register ResultReg = Root.getOperand(0).getReg();
7996 Register SrcReg0 = MAD->getOperand(1).getReg();
7997 Register SrcReg1 = MAD->getOperand(2).getReg();
7998 Register SrcReg2 = MAD->getOperand(3).getReg();
7999 bool Src0IsKill = MAD->getOperand(1).isKill();
8000 bool Src1IsKill = MAD->getOperand(2).isKill();
8001 bool Src2IsKill = MAD->getOperand(3).isKill();
8002 if (ResultReg.isVirtual())
8003 MRI.constrainRegClass(ResultReg, RC);
8004 if (SrcReg0.isVirtual())
8005 MRI.constrainRegClass(SrcReg0, RC);
8006 if (SrcReg1.isVirtual())
8007 MRI.constrainRegClass(SrcReg1, RC);
8008 if (SrcReg2.isVirtual())
8009 MRI.constrainRegClass(SrcReg2, RC);
8010
8012 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8013 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8014 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8015 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8016 InsInstrs.push_back(MIB);
8017
8018 return MAD;
8019}
8020
8021/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8022static MachineInstr *
8025 unsigned IdxDupOp, unsigned MulOpc,
8027 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8028 "Invalid index of FMUL operand");
8029
8030 MachineFunction &MF = *Root.getMF();
8032
8033 MachineInstr *Dup =
8034 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8035
8036 if (Dup->getOpcode() == TargetOpcode::COPY)
8037 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8038
8039 Register DupSrcReg = Dup->getOperand(1).getReg();
8040 MRI.clearKillFlags(DupSrcReg);
8041 MRI.constrainRegClass(DupSrcReg, RC);
8042
8043 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8044
8045 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8046 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8047
8048 Register ResultReg = Root.getOperand(0).getReg();
8049
8051 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8052 .add(MulOp)
8053 .addReg(DupSrcReg)
8054 .addImm(DupSrcLane);
8055
8056 InsInstrs.push_back(MIB);
8057 return &Root;
8058}
8059
8060/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8061/// instructions.
8062///
8063/// \see genFusedMultiply
8067 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8068 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8070}
8071
8072/// genNeg - Helper to generate an intermediate negation of the second operand
8073/// of Root
8075 const TargetInstrInfo *TII, MachineInstr &Root,
8077 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8078 unsigned MnegOpc, const TargetRegisterClass *RC) {
8079 Register NewVR = MRI.createVirtualRegister(RC);
8081 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8082 .add(Root.getOperand(2));
8083 InsInstrs.push_back(MIB);
8084
8085 assert(InstrIdxForVirtReg.empty());
8086 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8087
8088 return NewVR;
8089}
8090
8091/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8092/// instructions with an additional negation of the accumulator
8096 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8097 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8098 assert(IdxMulOpd == 1);
8099
8100 Register NewVR =
8101 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8102 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8103 FMAInstKind::Accumulator, &NewVR);
8104}
8105
8106/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8107/// instructions.
8108///
8109/// \see genFusedMultiply
8113 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8114 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8116}
8117
8118/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8119/// instructions with an additional negation of the accumulator
8123 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8124 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8125 assert(IdxMulOpd == 1);
8126
8127 Register NewVR =
8128 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8129
8130 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8131 FMAInstKind::Indexed, &NewVR);
8132}
8133
8134/// genMaddR - Generate madd instruction and combine mul and add using
8135/// an extra virtual register
8136/// Example - an ADD intermediate needs to be stored in a register:
8137/// MUL I=A,B,0
8138/// ADD R,I,Imm
8139/// ==> ORR V, ZR, Imm
8140/// ==> MADD R,A,B,V
8141/// \param MF Containing MachineFunction
8142/// \param MRI Register information
8143/// \param TII Target information
8144/// \param Root is the ADD instruction
8145/// \param [out] InsInstrs is a vector of machine instructions and will
8146/// contain the generated madd instruction
8147/// \param IdxMulOpd is index of operand in Root that is the result of
8148/// the MUL. In the example above IdxMulOpd is 1.
8149/// \param MaddOpc the opcode fo the madd instruction
8150/// \param VR is a virtual register that holds the value of an ADD operand
8151/// (V in the example above).
8152/// \param RC Register class of operands
8154 const TargetInstrInfo *TII, MachineInstr &Root,
8156 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8157 const TargetRegisterClass *RC) {
8158 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8159
8160 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8161 Register ResultReg = Root.getOperand(0).getReg();
8162 Register SrcReg0 = MUL->getOperand(1).getReg();
8163 bool Src0IsKill = MUL->getOperand(1).isKill();
8164 Register SrcReg1 = MUL->getOperand(2).getReg();
8165 bool Src1IsKill = MUL->getOperand(2).isKill();
8166
8167 if (ResultReg.isVirtual())
8168 MRI.constrainRegClass(ResultReg, RC);
8169 if (SrcReg0.isVirtual())
8170 MRI.constrainRegClass(SrcReg0, RC);
8171 if (SrcReg1.isVirtual())
8172 MRI.constrainRegClass(SrcReg1, RC);
8174 MRI.constrainRegClass(VR, RC);
8175
8177 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8178 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8179 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8180 .addReg(VR);
8181 // Insert the MADD
8182 InsInstrs.push_back(MIB);
8183 return MUL;
8184}
8185
8186/// Do the following transformation
8187/// A - (B + C) ==> (A - B) - C
8188/// A - (B + C) ==> (A - C) - B
8190 const TargetInstrInfo *TII, MachineInstr &Root,
8193 unsigned IdxOpd1,
8194 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8195 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8196 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8197 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8198
8199 Register ResultReg = Root.getOperand(0).getReg();
8200 Register RegA = Root.getOperand(1).getReg();
8201 bool RegAIsKill = Root.getOperand(1).isKill();
8202 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8203 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8204 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8205 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8206 Register NewVR =
8207 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8208
8209 unsigned Opcode = Root.getOpcode();
8210 if (Opcode == AArch64::SUBSWrr)
8211 Opcode = AArch64::SUBWrr;
8212 else if (Opcode == AArch64::SUBSXrr)
8213 Opcode = AArch64::SUBXrr;
8214 else
8215 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8216 "Unexpected instruction opcode.");
8217
8218 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8219 Flags &= ~MachineInstr::NoSWrap;
8220 Flags &= ~MachineInstr::NoUWrap;
8221
8222 MachineInstrBuilder MIB1 =
8223 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8224 .addReg(RegA, getKillRegState(RegAIsKill))
8225 .addReg(RegB, getKillRegState(RegBIsKill))
8226 .setMIFlags(Flags);
8227 MachineInstrBuilder MIB2 =
8228 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8229 .addReg(NewVR, getKillRegState(true))
8230 .addReg(RegC, getKillRegState(RegCIsKill))
8231 .setMIFlags(Flags);
8232
8233 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8234 InsInstrs.push_back(MIB1);
8235 InsInstrs.push_back(MIB2);
8236 DelInstrs.push_back(AddMI);
8237 DelInstrs.push_back(&Root);
8238}
8239
8240unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8241 unsigned int AccumulatorOpCode) const {
8242 switch (AccumulatorOpCode) {
8243 case AArch64::UABALB_ZZZ_D:
8244 case AArch64::SABALB_ZZZ_D:
8245 case AArch64::UABALT_ZZZ_D:
8246 case AArch64::SABALT_ZZZ_D:
8247 return AArch64::ADD_ZZZ_D;
8248 case AArch64::UABALB_ZZZ_H:
8249 case AArch64::SABALB_ZZZ_H:
8250 case AArch64::UABALT_ZZZ_H:
8251 case AArch64::SABALT_ZZZ_H:
8252 return AArch64::ADD_ZZZ_H;
8253 case AArch64::UABALB_ZZZ_S:
8254 case AArch64::SABALB_ZZZ_S:
8255 case AArch64::UABALT_ZZZ_S:
8256 case AArch64::SABALT_ZZZ_S:
8257 return AArch64::ADD_ZZZ_S;
8258 case AArch64::UABALv16i8_v8i16:
8259 case AArch64::SABALv8i8_v8i16:
8260 case AArch64::SABAv8i16:
8261 case AArch64::UABAv8i16:
8262 return AArch64::ADDv8i16;
8263 case AArch64::SABALv2i32_v2i64:
8264 case AArch64::UABALv2i32_v2i64:
8265 case AArch64::SABALv4i32_v2i64:
8266 return AArch64::ADDv2i64;
8267 case AArch64::UABALv4i16_v4i32:
8268 case AArch64::SABALv4i16_v4i32:
8269 case AArch64::SABALv8i16_v4i32:
8270 case AArch64::SABAv4i32:
8271 case AArch64::UABAv4i32:
8272 return AArch64::ADDv4i32;
8273 case AArch64::UABALv4i32_v2i64:
8274 return AArch64::ADDv2i64;
8275 case AArch64::UABALv8i16_v4i32:
8276 return AArch64::ADDv4i32;
8277 case AArch64::UABALv8i8_v8i16:
8278 case AArch64::SABALv16i8_v8i16:
8279 return AArch64::ADDv8i16;
8280 case AArch64::UABAv16i8:
8281 case AArch64::SABAv16i8:
8282 return AArch64::ADDv16i8;
8283 case AArch64::UABAv4i16:
8284 case AArch64::SABAv4i16:
8285 return AArch64::ADDv4i16;
8286 case AArch64::UABAv2i32:
8287 case AArch64::SABAv2i32:
8288 return AArch64::ADDv2i32;
8289 case AArch64::UABAv8i8:
8290 case AArch64::SABAv8i8:
8291 return AArch64::ADDv8i8;
8292 default:
8293 llvm_unreachable("Unknown accumulator opcode");
8294 }
8295}
8296
8297/// When getMachineCombinerPatterns() finds potential patterns,
8298/// this function generates the instructions that could replace the
8299/// original code sequence
8300void AArch64InstrInfo::genAlternativeCodeSequence(
8301 MachineInstr &Root, unsigned Pattern,
8304 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8305 MachineBasicBlock &MBB = *Root.getParent();
8306 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8307 MachineFunction &MF = *MBB.getParent();
8308 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8309
8310 MachineInstr *MUL = nullptr;
8311 const TargetRegisterClass *RC;
8312 unsigned Opc;
8313 switch (Pattern) {
8314 default:
8315 // Reassociate instructions.
8316 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8317 DelInstrs, InstrIdxForVirtReg);
8318 return;
8320 // A - (B + C)
8321 // ==> (A - B) - C
8322 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8323 InstrIdxForVirtReg);
8324 return;
8326 // A - (B + C)
8327 // ==> (A - C) - B
8328 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8329 InstrIdxForVirtReg);
8330 return;
8333 // MUL I=A,B,0
8334 // ADD R,I,C
8335 // ==> MADD R,A,B,C
8336 // --- Create(MADD);
8338 Opc = AArch64::MADDWrrr;
8339 RC = &AArch64::GPR32RegClass;
8340 } else {
8341 Opc = AArch64::MADDXrrr;
8342 RC = &AArch64::GPR64RegClass;
8343 }
8344 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8345 break;
8348 // MUL I=A,B,0
8349 // ADD R,C,I
8350 // ==> MADD R,A,B,C
8351 // --- Create(MADD);
8353 Opc = AArch64::MADDWrrr;
8354 RC = &AArch64::GPR32RegClass;
8355 } else {
8356 Opc = AArch64::MADDXrrr;
8357 RC = &AArch64::GPR64RegClass;
8358 }
8359 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8360 break;
8365 // MUL I=A,B,0
8366 // ADD/SUB R,I,Imm
8367 // ==> MOV V, Imm/-Imm
8368 // ==> MADD R,A,B,V
8369 // --- Create(MADD);
8370 const TargetRegisterClass *RC;
8371 unsigned BitSize, MovImm;
8374 MovImm = AArch64::MOVi32imm;
8375 RC = &AArch64::GPR32spRegClass;
8376 BitSize = 32;
8377 Opc = AArch64::MADDWrrr;
8378 RC = &AArch64::GPR32RegClass;
8379 } else {
8380 MovImm = AArch64::MOVi64imm;
8381 RC = &AArch64::GPR64spRegClass;
8382 BitSize = 64;
8383 Opc = AArch64::MADDXrrr;
8384 RC = &AArch64::GPR64RegClass;
8385 }
8386 Register NewVR = MRI.createVirtualRegister(RC);
8387 uint64_t Imm = Root.getOperand(2).getImm();
8388
8389 if (Root.getOperand(3).isImm()) {
8390 unsigned Val = Root.getOperand(3).getImm();
8391 Imm = Imm << Val;
8392 }
8393 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8395 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8396 // Check that the immediate can be composed via a single instruction.
8398 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8399 if (Insn.size() != 1)
8400 return;
8401 MachineInstrBuilder MIB1 =
8402 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8403 .addImm(IsSub ? -Imm : Imm);
8404 InsInstrs.push_back(MIB1);
8405 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8406 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8407 break;
8408 }
8411 // MUL I=A,B,0
8412 // SUB R,I, C
8413 // ==> SUB V, 0, C
8414 // ==> MADD R,A,B,V // = -C + A*B
8415 // --- Create(MADD);
8416 const TargetRegisterClass *SubRC;
8417 unsigned SubOpc, ZeroReg;
8419 SubOpc = AArch64::SUBWrr;
8420 SubRC = &AArch64::GPR32spRegClass;
8421 ZeroReg = AArch64::WZR;
8422 Opc = AArch64::MADDWrrr;
8423 RC = &AArch64::GPR32RegClass;
8424 } else {
8425 SubOpc = AArch64::SUBXrr;
8426 SubRC = &AArch64::GPR64spRegClass;
8427 ZeroReg = AArch64::XZR;
8428 Opc = AArch64::MADDXrrr;
8429 RC = &AArch64::GPR64RegClass;
8430 }
8431 Register NewVR = MRI.createVirtualRegister(SubRC);
8432 // SUB NewVR, 0, C
8433 MachineInstrBuilder MIB1 =
8434 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8435 .addReg(ZeroReg)
8436 .add(Root.getOperand(2));
8437 InsInstrs.push_back(MIB1);
8438 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8439 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8440 break;
8441 }
8444 // MUL I=A,B,0
8445 // SUB R,C,I
8446 // ==> MSUB R,A,B,C (computes C - A*B)
8447 // --- Create(MSUB);
8449 Opc = AArch64::MSUBWrrr;
8450 RC = &AArch64::GPR32RegClass;
8451 } else {
8452 Opc = AArch64::MSUBXrrr;
8453 RC = &AArch64::GPR64RegClass;
8454 }
8455 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8456 break;
8458 Opc = AArch64::MLAv8i8;
8459 RC = &AArch64::FPR64RegClass;
8460 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8461 break;
8463 Opc = AArch64::MLAv8i8;
8464 RC = &AArch64::FPR64RegClass;
8465 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8466 break;
8468 Opc = AArch64::MLAv16i8;
8469 RC = &AArch64::FPR128RegClass;
8470 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8471 break;
8473 Opc = AArch64::MLAv16i8;
8474 RC = &AArch64::FPR128RegClass;
8475 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8476 break;
8478 Opc = AArch64::MLAv4i16;
8479 RC = &AArch64::FPR64RegClass;
8480 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8481 break;
8483 Opc = AArch64::MLAv4i16;
8484 RC = &AArch64::FPR64RegClass;
8485 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8486 break;
8488 Opc = AArch64::MLAv8i16;
8489 RC = &AArch64::FPR128RegClass;
8490 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8491 break;
8493 Opc = AArch64::MLAv8i16;
8494 RC = &AArch64::FPR128RegClass;
8495 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8496 break;
8498 Opc = AArch64::MLAv2i32;
8499 RC = &AArch64::FPR64RegClass;
8500 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8501 break;
8503 Opc = AArch64::MLAv2i32;
8504 RC = &AArch64::FPR64RegClass;
8505 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8506 break;
8508 Opc = AArch64::MLAv4i32;
8509 RC = &AArch64::FPR128RegClass;
8510 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8511 break;
8513 Opc = AArch64::MLAv4i32;
8514 RC = &AArch64::FPR128RegClass;
8515 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8516 break;
8517
8519 Opc = AArch64::MLAv8i8;
8520 RC = &AArch64::FPR64RegClass;
8521 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8522 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8523 RC);
8524 break;
8526 Opc = AArch64::MLSv8i8;
8527 RC = &AArch64::FPR64RegClass;
8528 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8529 break;
8531 Opc = AArch64::MLAv16i8;
8532 RC = &AArch64::FPR128RegClass;
8533 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8534 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8535 RC);
8536 break;
8538 Opc = AArch64::MLSv16i8;
8539 RC = &AArch64::FPR128RegClass;
8540 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8541 break;
8543 Opc = AArch64::MLAv4i16;
8544 RC = &AArch64::FPR64RegClass;
8545 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8546 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8547 RC);
8548 break;
8550 Opc = AArch64::MLSv4i16;
8551 RC = &AArch64::FPR64RegClass;
8552 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8553 break;
8555 Opc = AArch64::MLAv8i16;
8556 RC = &AArch64::FPR128RegClass;
8557 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8558 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8559 RC);
8560 break;
8562 Opc = AArch64::MLSv8i16;
8563 RC = &AArch64::FPR128RegClass;
8564 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8565 break;
8567 Opc = AArch64::MLAv2i32;
8568 RC = &AArch64::FPR64RegClass;
8569 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8570 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8571 RC);
8572 break;
8574 Opc = AArch64::MLSv2i32;
8575 RC = &AArch64::FPR64RegClass;
8576 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8577 break;
8579 Opc = AArch64::MLAv4i32;
8580 RC = &AArch64::FPR128RegClass;
8581 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8582 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8583 RC);
8584 break;
8586 Opc = AArch64::MLSv4i32;
8587 RC = &AArch64::FPR128RegClass;
8588 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8589 break;
8590
8592 Opc = AArch64::MLAv4i16_indexed;
8593 RC = &AArch64::FPR64RegClass;
8594 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8595 break;
8597 Opc = AArch64::MLAv4i16_indexed;
8598 RC = &AArch64::FPR64RegClass;
8599 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8600 break;
8602 Opc = AArch64::MLAv8i16_indexed;
8603 RC = &AArch64::FPR128RegClass;
8604 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8605 break;
8607 Opc = AArch64::MLAv8i16_indexed;
8608 RC = &AArch64::FPR128RegClass;
8609 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8610 break;
8612 Opc = AArch64::MLAv2i32_indexed;
8613 RC = &AArch64::FPR64RegClass;
8614 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8615 break;
8617 Opc = AArch64::MLAv2i32_indexed;
8618 RC = &AArch64::FPR64RegClass;
8619 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8620 break;
8622 Opc = AArch64::MLAv4i32_indexed;
8623 RC = &AArch64::FPR128RegClass;
8624 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8625 break;
8627 Opc = AArch64::MLAv4i32_indexed;
8628 RC = &AArch64::FPR128RegClass;
8629 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8630 break;
8631
8633 Opc = AArch64::MLAv4i16_indexed;
8634 RC = &AArch64::FPR64RegClass;
8635 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8636 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8637 RC);
8638 break;
8640 Opc = AArch64::MLSv4i16_indexed;
8641 RC = &AArch64::FPR64RegClass;
8642 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8643 break;
8645 Opc = AArch64::MLAv8i16_indexed;
8646 RC = &AArch64::FPR128RegClass;
8647 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8648 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8649 RC);
8650 break;
8652 Opc = AArch64::MLSv8i16_indexed;
8653 RC = &AArch64::FPR128RegClass;
8654 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8655 break;
8657 Opc = AArch64::MLAv2i32_indexed;
8658 RC = &AArch64::FPR64RegClass;
8659 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8660 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8661 RC);
8662 break;
8664 Opc = AArch64::MLSv2i32_indexed;
8665 RC = &AArch64::FPR64RegClass;
8666 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8667 break;
8669 Opc = AArch64::MLAv4i32_indexed;
8670 RC = &AArch64::FPR128RegClass;
8671 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8672 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8673 RC);
8674 break;
8676 Opc = AArch64::MLSv4i32_indexed;
8677 RC = &AArch64::FPR128RegClass;
8678 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8679 break;
8680
8681 // Floating Point Support
8683 Opc = AArch64::FMADDHrrr;
8684 RC = &AArch64::FPR16RegClass;
8685 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8686 break;
8688 Opc = AArch64::FMADDSrrr;
8689 RC = &AArch64::FPR32RegClass;
8690 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8691 break;
8693 Opc = AArch64::FMADDDrrr;
8694 RC = &AArch64::FPR64RegClass;
8695 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8696 break;
8697
8699 Opc = AArch64::FMADDHrrr;
8700 RC = &AArch64::FPR16RegClass;
8701 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8702 break;
8704 Opc = AArch64::FMADDSrrr;
8705 RC = &AArch64::FPR32RegClass;
8706 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8707 break;
8709 Opc = AArch64::FMADDDrrr;
8710 RC = &AArch64::FPR64RegClass;
8711 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8712 break;
8713
8715 Opc = AArch64::FMLAv1i32_indexed;
8716 RC = &AArch64::FPR32RegClass;
8717 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8719 break;
8721 Opc = AArch64::FMLAv1i32_indexed;
8722 RC = &AArch64::FPR32RegClass;
8723 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8725 break;
8726
8728 Opc = AArch64::FMLAv1i64_indexed;
8729 RC = &AArch64::FPR64RegClass;
8730 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8732 break;
8734 Opc = AArch64::FMLAv1i64_indexed;
8735 RC = &AArch64::FPR64RegClass;
8736 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8738 break;
8739
8741 RC = &AArch64::FPR64RegClass;
8742 Opc = AArch64::FMLAv4i16_indexed;
8743 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8745 break;
8747 RC = &AArch64::FPR64RegClass;
8748 Opc = AArch64::FMLAv4f16;
8749 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8751 break;
8753 RC = &AArch64::FPR64RegClass;
8754 Opc = AArch64::FMLAv4i16_indexed;
8755 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8757 break;
8759 RC = &AArch64::FPR64RegClass;
8760 Opc = AArch64::FMLAv4f16;
8761 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8763 break;
8764
8767 RC = &AArch64::FPR64RegClass;
8769 Opc = AArch64::FMLAv2i32_indexed;
8770 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8772 } else {
8773 Opc = AArch64::FMLAv2f32;
8774 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8776 }
8777 break;
8780 RC = &AArch64::FPR64RegClass;
8782 Opc = AArch64::FMLAv2i32_indexed;
8783 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8785 } else {
8786 Opc = AArch64::FMLAv2f32;
8787 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8789 }
8790 break;
8791
8793 RC = &AArch64::FPR128RegClass;
8794 Opc = AArch64::FMLAv8i16_indexed;
8795 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8797 break;
8799 RC = &AArch64::FPR128RegClass;
8800 Opc = AArch64::FMLAv8f16;
8801 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8803 break;
8805 RC = &AArch64::FPR128RegClass;
8806 Opc = AArch64::FMLAv8i16_indexed;
8807 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8809 break;
8811 RC = &AArch64::FPR128RegClass;
8812 Opc = AArch64::FMLAv8f16;
8813 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8815 break;
8816
8819 RC = &AArch64::FPR128RegClass;
8821 Opc = AArch64::FMLAv2i64_indexed;
8822 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8824 } else {
8825 Opc = AArch64::FMLAv2f64;
8826 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8828 }
8829 break;
8832 RC = &AArch64::FPR128RegClass;
8834 Opc = AArch64::FMLAv2i64_indexed;
8835 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8837 } else {
8838 Opc = AArch64::FMLAv2f64;
8839 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8841 }
8842 break;
8843
8846 RC = &AArch64::FPR128RegClass;
8848 Opc = AArch64::FMLAv4i32_indexed;
8849 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8851 } else {
8852 Opc = AArch64::FMLAv4f32;
8853 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8855 }
8856 break;
8857
8860 RC = &AArch64::FPR128RegClass;
8862 Opc = AArch64::FMLAv4i32_indexed;
8863 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8865 } else {
8866 Opc = AArch64::FMLAv4f32;
8867 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8869 }
8870 break;
8871
8873 Opc = AArch64::FNMSUBHrrr;
8874 RC = &AArch64::FPR16RegClass;
8875 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8876 break;
8878 Opc = AArch64::FNMSUBSrrr;
8879 RC = &AArch64::FPR32RegClass;
8880 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8881 break;
8883 Opc = AArch64::FNMSUBDrrr;
8884 RC = &AArch64::FPR64RegClass;
8885 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8886 break;
8887
8889 Opc = AArch64::FNMADDHrrr;
8890 RC = &AArch64::FPR16RegClass;
8891 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8892 break;
8894 Opc = AArch64::FNMADDSrrr;
8895 RC = &AArch64::FPR32RegClass;
8896 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8897 break;
8899 Opc = AArch64::FNMADDDrrr;
8900 RC = &AArch64::FPR64RegClass;
8901 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8902 break;
8903
8905 Opc = AArch64::FMSUBHrrr;
8906 RC = &AArch64::FPR16RegClass;
8907 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8908 break;
8910 Opc = AArch64::FMSUBSrrr;
8911 RC = &AArch64::FPR32RegClass;
8912 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8913 break;
8915 Opc = AArch64::FMSUBDrrr;
8916 RC = &AArch64::FPR64RegClass;
8917 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8918 break;
8919
8921 Opc = AArch64::FMLSv1i32_indexed;
8922 RC = &AArch64::FPR32RegClass;
8923 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8925 break;
8926
8928 Opc = AArch64::FMLSv1i64_indexed;
8929 RC = &AArch64::FPR64RegClass;
8930 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8932 break;
8933
8936 RC = &AArch64::FPR64RegClass;
8937 Register NewVR = MRI.createVirtualRegister(RC);
8938 MachineInstrBuilder MIB1 =
8939 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8940 .add(Root.getOperand(2));
8941 InsInstrs.push_back(MIB1);
8942 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8944 Opc = AArch64::FMLAv4f16;
8945 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8946 FMAInstKind::Accumulator, &NewVR);
8947 } else {
8948 Opc = AArch64::FMLAv4i16_indexed;
8949 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8950 FMAInstKind::Indexed, &NewVR);
8951 }
8952 break;
8953 }
8955 RC = &AArch64::FPR64RegClass;
8956 Opc = AArch64::FMLSv4f16;
8957 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8959 break;
8961 RC = &AArch64::FPR64RegClass;
8962 Opc = AArch64::FMLSv4i16_indexed;
8963 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8965 break;
8966
8969 RC = &AArch64::FPR64RegClass;
8971 Opc = AArch64::FMLSv2i32_indexed;
8972 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8974 } else {
8975 Opc = AArch64::FMLSv2f32;
8976 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8978 }
8979 break;
8980
8983 RC = &AArch64::FPR128RegClass;
8984 Register NewVR = MRI.createVirtualRegister(RC);
8985 MachineInstrBuilder MIB1 =
8986 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
8987 .add(Root.getOperand(2));
8988 InsInstrs.push_back(MIB1);
8989 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8991 Opc = AArch64::FMLAv8f16;
8992 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8993 FMAInstKind::Accumulator, &NewVR);
8994 } else {
8995 Opc = AArch64::FMLAv8i16_indexed;
8996 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8997 FMAInstKind::Indexed, &NewVR);
8998 }
8999 break;
9000 }
9002 RC = &AArch64::FPR128RegClass;
9003 Opc = AArch64::FMLSv8f16;
9004 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9006 break;
9008 RC = &AArch64::FPR128RegClass;
9009 Opc = AArch64::FMLSv8i16_indexed;
9010 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9012 break;
9013
9016 RC = &AArch64::FPR128RegClass;
9018 Opc = AArch64::FMLSv2i64_indexed;
9019 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9021 } else {
9022 Opc = AArch64::FMLSv2f64;
9023 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9025 }
9026 break;
9027
9030 RC = &AArch64::FPR128RegClass;
9032 Opc = AArch64::FMLSv4i32_indexed;
9033 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9035 } else {
9036 Opc = AArch64::FMLSv4f32;
9037 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9039 }
9040 break;
9043 RC = &AArch64::FPR64RegClass;
9044 Register NewVR = MRI.createVirtualRegister(RC);
9045 MachineInstrBuilder MIB1 =
9046 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9047 .add(Root.getOperand(2));
9048 InsInstrs.push_back(MIB1);
9049 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9051 Opc = AArch64::FMLAv2i32_indexed;
9052 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9053 FMAInstKind::Indexed, &NewVR);
9054 } else {
9055 Opc = AArch64::FMLAv2f32;
9056 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9057 FMAInstKind::Accumulator, &NewVR);
9058 }
9059 break;
9060 }
9063 RC = &AArch64::FPR128RegClass;
9064 Register NewVR = MRI.createVirtualRegister(RC);
9065 MachineInstrBuilder MIB1 =
9066 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9067 .add(Root.getOperand(2));
9068 InsInstrs.push_back(MIB1);
9069 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9071 Opc = AArch64::FMLAv4i32_indexed;
9072 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9073 FMAInstKind::Indexed, &NewVR);
9074 } else {
9075 Opc = AArch64::FMLAv4f32;
9076 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9077 FMAInstKind::Accumulator, &NewVR);
9078 }
9079 break;
9080 }
9083 RC = &AArch64::FPR128RegClass;
9084 Register NewVR = MRI.createVirtualRegister(RC);
9085 MachineInstrBuilder MIB1 =
9086 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9087 .add(Root.getOperand(2));
9088 InsInstrs.push_back(MIB1);
9089 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9091 Opc = AArch64::FMLAv2i64_indexed;
9092 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9093 FMAInstKind::Indexed, &NewVR);
9094 } else {
9095 Opc = AArch64::FMLAv2f64;
9096 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9097 FMAInstKind::Accumulator, &NewVR);
9098 }
9099 break;
9100 }
9103 unsigned IdxDupOp =
9105 : 2;
9106 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9107 &AArch64::FPR128RegClass, MRI);
9108 break;
9109 }
9112 unsigned IdxDupOp =
9114 : 2;
9115 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9116 &AArch64::FPR128RegClass, MRI);
9117 break;
9118 }
9121 unsigned IdxDupOp =
9123 : 2;
9124 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9125 &AArch64::FPR128_loRegClass, MRI);
9126 break;
9127 }
9130 unsigned IdxDupOp =
9132 : 2;
9133 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9134 &AArch64::FPR128RegClass, MRI);
9135 break;
9136 }
9139 unsigned IdxDupOp =
9141 : 2;
9142 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9143 &AArch64::FPR128_loRegClass, MRI);
9144 break;
9145 }
9147 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9148 break;
9149 }
9151 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9152 Pattern, 4);
9153 break;
9154 }
9156 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9157 Pattern, 8);
9158 break;
9159 }
9161 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9162 Pattern, 16);
9163 break;
9164 }
9165
9166 } // end switch (Pattern)
9167 // Record MUL and ADD/SUB for deletion
9168 if (MUL)
9169 DelInstrs.push_back(MUL);
9170 DelInstrs.push_back(&Root);
9171
9172 // Set the flags on the inserted instructions to be the merged flags of the
9173 // instructions that we have combined.
9174 uint32_t Flags = Root.getFlags();
9175 if (MUL)
9176 Flags = Root.mergeFlagsWith(*MUL);
9177 for (auto *MI : InsInstrs)
9178 MI->setFlags(Flags);
9179}
9180
9181/// Replace csincr-branch sequence by simple conditional branch
9182///
9183/// Examples:
9184/// 1. \code
9185/// csinc w9, wzr, wzr, <condition code>
9186/// tbnz w9, #0, 0x44
9187/// \endcode
9188/// to
9189/// \code
9190/// b.<inverted condition code>
9191/// \endcode
9192///
9193/// 2. \code
9194/// csinc w9, wzr, wzr, <condition code>
9195/// tbz w9, #0, 0x44
9196/// \endcode
9197/// to
9198/// \code
9199/// b.<condition code>
9200/// \endcode
9201///
9202/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9203/// compare's constant operand is power of 2.
9204///
9205/// Examples:
9206/// \code
9207/// and w8, w8, #0x400
9208/// cbnz w8, L1
9209/// \endcode
9210/// to
9211/// \code
9212/// tbnz w8, #10, L1
9213/// \endcode
9214///
9215/// \param MI Conditional Branch
9216/// \return True when the simple conditional branch is generated
9217///
9219 bool IsNegativeBranch = false;
9220 bool IsTestAndBranch = false;
9221 unsigned TargetBBInMI = 0;
9222 switch (MI.getOpcode()) {
9223 default:
9224 llvm_unreachable("Unknown branch instruction?");
9225 case AArch64::Bcc:
9226 case AArch64::CBWPri:
9227 case AArch64::CBXPri:
9228 case AArch64::CBWPrr:
9229 case AArch64::CBXPrr:
9230 return false;
9231 case AArch64::CBZW:
9232 case AArch64::CBZX:
9233 TargetBBInMI = 1;
9234 break;
9235 case AArch64::CBNZW:
9236 case AArch64::CBNZX:
9237 TargetBBInMI = 1;
9238 IsNegativeBranch = true;
9239 break;
9240 case AArch64::TBZW:
9241 case AArch64::TBZX:
9242 TargetBBInMI = 2;
9243 IsTestAndBranch = true;
9244 break;
9245 case AArch64::TBNZW:
9246 case AArch64::TBNZX:
9247 TargetBBInMI = 2;
9248 IsNegativeBranch = true;
9249 IsTestAndBranch = true;
9250 break;
9251 }
9252 // So we increment a zero register and test for bits other
9253 // than bit 0? Conservatively bail out in case the verifier
9254 // missed this case.
9255 if (IsTestAndBranch && MI.getOperand(1).getImm())
9256 return false;
9257
9258 // Find Definition.
9259 assert(MI.getParent() && "Incomplete machine instruction\n");
9260 MachineBasicBlock *MBB = MI.getParent();
9261 MachineFunction *MF = MBB->getParent();
9263 Register VReg = MI.getOperand(0).getReg();
9264 if (!VReg.isVirtual())
9265 return false;
9266
9267 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9268
9269 // Look through COPY instructions to find definition.
9270 while (DefMI->isCopy()) {
9271 Register CopyVReg = DefMI->getOperand(1).getReg();
9272 if (!MRI->hasOneNonDBGUse(CopyVReg))
9273 return false;
9274 if (!MRI->hasOneDef(CopyVReg))
9275 return false;
9276 DefMI = MRI->getVRegDef(CopyVReg);
9277 }
9278
9279 switch (DefMI->getOpcode()) {
9280 default:
9281 return false;
9282 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9283 case AArch64::ANDWri:
9284 case AArch64::ANDXri: {
9285 if (IsTestAndBranch)
9286 return false;
9287 if (DefMI->getParent() != MBB)
9288 return false;
9289 if (!MRI->hasOneNonDBGUse(VReg))
9290 return false;
9291
9292 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9294 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9295 if (!isPowerOf2_64(Mask))
9296 return false;
9297
9298 MachineOperand &MO = DefMI->getOperand(1);
9299 Register NewReg = MO.getReg();
9300 if (!NewReg.isVirtual())
9301 return false;
9302
9303 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9304
9305 MachineBasicBlock &RefToMBB = *MBB;
9306 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9307 DebugLoc DL = MI.getDebugLoc();
9308 unsigned Imm = Log2_64(Mask);
9309 unsigned Opc = (Imm < 32)
9310 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9311 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9312 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9313 .addReg(NewReg)
9314 .addImm(Imm)
9315 .addMBB(TBB);
9316 // Register lives on to the CBZ now.
9317 MO.setIsKill(false);
9318
9319 // For immediate smaller than 32, we need to use the 32-bit
9320 // variant (W) in all cases. Indeed the 64-bit variant does not
9321 // allow to encode them.
9322 // Therefore, if the input register is 64-bit, we need to take the
9323 // 32-bit sub-part.
9324 if (!Is32Bit && Imm < 32)
9325 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9326 MI.eraseFromParent();
9327 return true;
9328 }
9329 // Look for CSINC
9330 case AArch64::CSINCWr:
9331 case AArch64::CSINCXr: {
9332 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9333 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9334 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9335 DefMI->getOperand(2).getReg() == AArch64::XZR))
9336 return false;
9337
9338 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9339 true) != -1)
9340 return false;
9341
9342 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9343 // Convert only when the condition code is not modified between
9344 // the CSINC and the branch. The CC may be used by other
9345 // instructions in between.
9347 return false;
9348 MachineBasicBlock &RefToMBB = *MBB;
9349 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9350 DebugLoc DL = MI.getDebugLoc();
9351 if (IsNegativeBranch)
9353 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9354 MI.eraseFromParent();
9355 return true;
9356 }
9357 }
9358}
9359
9360std::pair<unsigned, unsigned>
9361AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9362 const unsigned Mask = AArch64II::MO_FRAGMENT;
9363 return std::make_pair(TF & Mask, TF & ~Mask);
9364}
9365
9367AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9368 using namespace AArch64II;
9369
9370 static const std::pair<unsigned, const char *> TargetFlags[] = {
9371 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9372 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9373 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9374 {MO_HI12, "aarch64-hi12"}};
9375 return ArrayRef(TargetFlags);
9376}
9377
9379AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9380 using namespace AArch64II;
9381
9382 static const std::pair<unsigned, const char *> TargetFlags[] = {
9383 {MO_COFFSTUB, "aarch64-coffstub"},
9384 {MO_GOT, "aarch64-got"},
9385 {MO_NC, "aarch64-nc"},
9386 {MO_S, "aarch64-s"},
9387 {MO_TLS, "aarch64-tls"},
9388 {MO_DLLIMPORT, "aarch64-dllimport"},
9389 {MO_PREL, "aarch64-prel"},
9390 {MO_TAGGED, "aarch64-tagged"},
9391 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9392 };
9393 return ArrayRef(TargetFlags);
9394}
9395
9397AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9398 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9399 {{MOSuppressPair, "aarch64-suppress-pair"},
9400 {MOStridedAccess, "aarch64-strided-access"}};
9401 return ArrayRef(TargetFlags);
9402}
9403
9404/// Constants defining how certain sequences should be outlined.
9405/// This encompasses how an outlined function should be called, and what kind of
9406/// frame should be emitted for that outlined function.
9407///
9408/// \p MachineOutlinerDefault implies that the function should be called with
9409/// a save and restore of LR to the stack.
9410///
9411/// That is,
9412///
9413/// I1 Save LR OUTLINED_FUNCTION:
9414/// I2 --> BL OUTLINED_FUNCTION I1
9415/// I3 Restore LR I2
9416/// I3
9417/// RET
9418///
9419/// * Call construction overhead: 3 (save + BL + restore)
9420/// * Frame construction overhead: 1 (ret)
9421/// * Requires stack fixups? Yes
9422///
9423/// \p MachineOutlinerTailCall implies that the function is being created from
9424/// a sequence of instructions ending in a return.
9425///
9426/// That is,
9427///
9428/// I1 OUTLINED_FUNCTION:
9429/// I2 --> B OUTLINED_FUNCTION I1
9430/// RET I2
9431/// RET
9432///
9433/// * Call construction overhead: 1 (B)
9434/// * Frame construction overhead: 0 (Return included in sequence)
9435/// * Requires stack fixups? No
9436///
9437/// \p MachineOutlinerNoLRSave implies that the function should be called using
9438/// a BL instruction, but doesn't require LR to be saved and restored. This
9439/// happens when LR is known to be dead.
9440///
9441/// That is,
9442///
9443/// I1 OUTLINED_FUNCTION:
9444/// I2 --> BL OUTLINED_FUNCTION I1
9445/// I3 I2
9446/// I3
9447/// RET
9448///
9449/// * Call construction overhead: 1 (BL)
9450/// * Frame construction overhead: 1 (RET)
9451/// * Requires stack fixups? No
9452///
9453/// \p MachineOutlinerThunk implies that the function is being created from
9454/// a sequence of instructions ending in a call. The outlined function is
9455/// called with a BL instruction, and the outlined function tail-calls the
9456/// original call destination.
9457///
9458/// That is,
9459///
9460/// I1 OUTLINED_FUNCTION:
9461/// I2 --> BL OUTLINED_FUNCTION I1
9462/// BL f I2
9463/// B f
9464/// * Call construction overhead: 1 (BL)
9465/// * Frame construction overhead: 0
9466/// * Requires stack fixups? No
9467///
9468/// \p MachineOutlinerRegSave implies that the function should be called with a
9469/// save and restore of LR to an available register. This allows us to avoid
9470/// stack fixups. Note that this outlining variant is compatible with the
9471/// NoLRSave case.
9472///
9473/// That is,
9474///
9475/// I1 Save LR OUTLINED_FUNCTION:
9476/// I2 --> BL OUTLINED_FUNCTION I1
9477/// I3 Restore LR I2
9478/// I3
9479/// RET
9480///
9481/// * Call construction overhead: 3 (save + BL + restore)
9482/// * Frame construction overhead: 1 (ret)
9483/// * Requires stack fixups? No
9485 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9486 MachineOutlinerTailCall, /// Only emit a branch.
9487 MachineOutlinerNoLRSave, /// Emit a call and return.
9488 MachineOutlinerThunk, /// Emit a call and tail-call.
9489 MachineOutlinerRegSave /// Same as default, but save to a register.
9490};
9491
9497
9499AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9500 MachineFunction *MF = C.getMF();
9501 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9502 const AArch64RegisterInfo *ARI =
9503 static_cast<const AArch64RegisterInfo *>(&TRI);
9504 // Check if there is an available register across the sequence that we can
9505 // use.
9506 for (unsigned Reg : AArch64::GPR64RegClass) {
9507 if (!ARI->isReservedReg(*MF, Reg) &&
9508 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9509 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9510 Reg != AArch64::X17 && // Ditto for X17.
9511 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9512 C.isAvailableInsideSeq(Reg, TRI))
9513 return Reg;
9514 }
9515 return Register();
9516}
9517
9518static bool
9520 const outliner::Candidate &b) {
9521 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9522 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9523
9524 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
9525 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
9526}
9527
9528static bool
9530 const outliner::Candidate &b) {
9531 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9532 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9533
9534 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9535}
9536
9538 const outliner::Candidate &b) {
9539 const AArch64Subtarget &SubtargetA =
9541 const AArch64Subtarget &SubtargetB =
9542 b.getMF()->getSubtarget<AArch64Subtarget>();
9543 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9544}
9545
9546std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9547AArch64InstrInfo::getOutliningCandidateInfo(
9548 const MachineModuleInfo &MMI,
9549 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9550 unsigned MinRepeats) const {
9551 unsigned SequenceSize = 0;
9552 for (auto &MI : RepeatedSequenceLocs[0])
9553 SequenceSize += getInstSizeInBytes(MI);
9554
9555 unsigned NumBytesToCreateFrame = 0;
9556
9557 // We only allow outlining for functions having exactly matching return
9558 // address signing attributes, i.e., all share the same value for the
9559 // attribute "sign-return-address" and all share the same type of key they
9560 // are signed with.
9561 // Additionally we require all functions to simultaneously either support
9562 // v8.3a features or not. Otherwise an outlined function could get signed
9563 // using dedicated v8.3 instructions and a call from a function that doesn't
9564 // support v8.3 instructions would therefore be invalid.
9565 if (std::adjacent_find(
9566 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9567 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9568 // Return true if a and b are non-equal w.r.t. return address
9569 // signing or support of v8.3a features
9570 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9571 outliningCandidatesSigningKeyConsensus(a, b) &&
9572 outliningCandidatesV8_3OpsConsensus(a, b)) {
9573 return false;
9574 }
9575 return true;
9576 }) != RepeatedSequenceLocs.end()) {
9577 return std::nullopt;
9578 }
9579
9580 // Since at this point all candidates agree on their return address signing
9581 // picking just one is fine. If the candidate functions potentially sign their
9582 // return addresses, the outlined function should do the same. Note that in
9583 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9584 // not certainly true that the outlined function will have to sign its return
9585 // address but this decision is made later, when the decision to outline
9586 // has already been made.
9587 // The same holds for the number of additional instructions we need: On
9588 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9589 // necessary. However, at this point we don't know if the outlined function
9590 // will have a RET instruction so we assume the worst.
9591 const TargetRegisterInfo &TRI = getRegisterInfo();
9592 // Performing a tail call may require extra checks when PAuth is enabled.
9593 // If PAuth is disabled, set it to zero for uniformity.
9594 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9595 if (RepeatedSequenceLocs[0]
9596 .getMF()
9597 ->getInfo<AArch64FunctionInfo>()
9598 ->shouldSignReturnAddress(true)) {
9599 // One PAC and one AUT instructions
9600 NumBytesToCreateFrame += 8;
9601
9602 // PAuth is enabled - set extra tail call cost, if any.
9603 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9604 *RepeatedSequenceLocs[0].getMF());
9605 NumBytesToCheckLRInTCEpilogue =
9607 // Checking the authenticated LR value may significantly impact
9608 // SequenceSize, so account for it for more precise results.
9609 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
9610 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9611
9612 // We have to check if sp modifying instructions would get outlined.
9613 // If so we only allow outlining if sp is unchanged overall, so matching
9614 // sub and add instructions are okay to outline, all other sp modifications
9615 // are not
9616 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9617 int SPValue = 0;
9618 for (auto &MI : C) {
9619 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
9620 switch (MI.getOpcode()) {
9621 case AArch64::ADDXri:
9622 case AArch64::ADDWri:
9623 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9624 assert(MI.getOperand(2).isImm() &&
9625 "Expected operand to be immediate");
9626 assert(MI.getOperand(1).isReg() &&
9627 "Expected operand to be a register");
9628 // Check if the add just increments sp. If so, we search for
9629 // matching sub instructions that decrement sp. If not, the
9630 // modification is illegal
9631 if (MI.getOperand(1).getReg() == AArch64::SP)
9632 SPValue += MI.getOperand(2).getImm();
9633 else
9634 return true;
9635 break;
9636 case AArch64::SUBXri:
9637 case AArch64::SUBWri:
9638 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9639 assert(MI.getOperand(2).isImm() &&
9640 "Expected operand to be immediate");
9641 assert(MI.getOperand(1).isReg() &&
9642 "Expected operand to be a register");
9643 // Check if the sub just decrements sp. If so, we search for
9644 // matching add instructions that increment sp. If not, the
9645 // modification is illegal
9646 if (MI.getOperand(1).getReg() == AArch64::SP)
9647 SPValue -= MI.getOperand(2).getImm();
9648 else
9649 return true;
9650 break;
9651 default:
9652 return true;
9653 }
9654 }
9655 }
9656 if (SPValue)
9657 return true;
9658 return false;
9659 };
9660 // Remove candidates with illegal stack modifying instructions
9661 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
9662
9663 // If the sequence doesn't have enough candidates left, then we're done.
9664 if (RepeatedSequenceLocs.size() < MinRepeats)
9665 return std::nullopt;
9666 }
9667
9668 // Properties about candidate MBBs that hold for all of them.
9669 unsigned FlagsSetInAll = 0xF;
9670
9671 // Compute liveness information for each candidate, and set FlagsSetInAll.
9672 for (outliner::Candidate &C : RepeatedSequenceLocs)
9673 FlagsSetInAll &= C.Flags;
9674
9675 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
9676
9677 // Helper lambda which sets call information for every candidate.
9678 auto SetCandidateCallInfo =
9679 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9680 for (outliner::Candidate &C : RepeatedSequenceLocs)
9681 C.setCallInfo(CallID, NumBytesForCall);
9682 };
9683
9684 unsigned FrameID = MachineOutlinerDefault;
9685 NumBytesToCreateFrame += 4;
9686
9687 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
9688 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9689 });
9690
9691 // We check to see if CFI Instructions are present, and if they are
9692 // we find the number of CFI Instructions in the candidates.
9693 unsigned CFICount = 0;
9694 for (auto &I : RepeatedSequenceLocs[0]) {
9695 if (I.isCFIInstruction())
9696 CFICount++;
9697 }
9698
9699 // We compare the number of found CFI Instructions to the number of CFI
9700 // instructions in the parent function for each candidate. We must check this
9701 // since if we outline one of the CFI instructions in a function, we have to
9702 // outline them all for correctness. If we do not, the address offsets will be
9703 // incorrect between the two sections of the program.
9704 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9705 std::vector<MCCFIInstruction> CFIInstructions =
9706 C.getMF()->getFrameInstructions();
9707
9708 if (CFICount > 0 && CFICount != CFIInstructions.size())
9709 return std::nullopt;
9710 }
9711
9712 // Returns true if an instructions is safe to fix up, false otherwise.
9713 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
9714 if (MI.isCall())
9715 return true;
9716
9717 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
9718 !MI.readsRegister(AArch64::SP, &TRI))
9719 return true;
9720
9721 // Any modification of SP will break our code to save/restore LR.
9722 // FIXME: We could handle some instructions which add a constant
9723 // offset to SP, with a bit more work.
9724 if (MI.modifiesRegister(AArch64::SP, &TRI))
9725 return false;
9726
9727 // At this point, we have a stack instruction that we might need to
9728 // fix up. We'll handle it if it's a load or store.
9729 if (MI.mayLoadOrStore()) {
9730 const MachineOperand *Base; // Filled with the base operand of MI.
9731 int64_t Offset; // Filled with the offset of MI.
9732 bool OffsetIsScalable;
9733
9734 // Does it allow us to offset the base operand and is the base the
9735 // register SP?
9736 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
9737 !Base->isReg() || Base->getReg() != AArch64::SP)
9738 return false;
9739
9740 // Fixe-up code below assumes bytes.
9741 if (OffsetIsScalable)
9742 return false;
9743
9744 // Find the minimum/maximum offset for this instruction and check
9745 // if fixing it up would be in range.
9746 int64_t MinOffset,
9747 MaxOffset; // Unscaled offsets for the instruction.
9748 // The scale to multiply the offsets by.
9749 TypeSize Scale(0U, false), DummyWidth(0U, false);
9750 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
9751
9752 Offset += 16; // Update the offset to what it would be if we outlined.
9753 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
9754 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
9755 return false;
9756
9757 // It's in range, so we can outline it.
9758 return true;
9759 }
9760
9761 // FIXME: Add handling for instructions like "add x0, sp, #8".
9762
9763 // We can't fix it up, so don't outline it.
9764 return false;
9765 };
9766
9767 // True if it's possible to fix up each stack instruction in this sequence.
9768 // Important for frames/call variants that modify the stack.
9769 bool AllStackInstrsSafe =
9770 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
9771
9772 // If the last instruction in any candidate is a terminator, then we should
9773 // tail call all of the candidates.
9774 if (RepeatedSequenceLocs[0].back().isTerminator()) {
9775 FrameID = MachineOutlinerTailCall;
9776 NumBytesToCreateFrame = 0;
9777 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
9778 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
9779 }
9780
9781 else if (LastInstrOpcode == AArch64::BL ||
9782 ((LastInstrOpcode == AArch64::BLR ||
9783 LastInstrOpcode == AArch64::BLRNoIP) &&
9784 !HasBTI)) {
9785 // FIXME: Do we need to check if the code after this uses the value of LR?
9786 FrameID = MachineOutlinerThunk;
9787 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
9788 SetCandidateCallInfo(MachineOutlinerThunk, 4);
9789 }
9790
9791 else {
9792 // We need to decide how to emit calls + frames. We can always emit the same
9793 // frame if we don't need to save to the stack. If we have to save to the
9794 // stack, then we need a different frame.
9795 unsigned NumBytesNoStackCalls = 0;
9796 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
9797
9798 // Check if we have to save LR.
9799 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9800 bool LRAvailable =
9802 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
9803 : true;
9804 // If we have a noreturn caller, then we're going to be conservative and
9805 // say that we have to save LR. If we don't have a ret at the end of the
9806 // block, then we can't reason about liveness accurately.
9807 //
9808 // FIXME: We can probably do better than always disabling this in
9809 // noreturn functions by fixing up the liveness info.
9810 bool IsNoReturn =
9811 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
9812
9813 // Is LR available? If so, we don't need a save.
9814 if (LRAvailable && !IsNoReturn) {
9815 NumBytesNoStackCalls += 4;
9816 C.setCallInfo(MachineOutlinerNoLRSave, 4);
9817 CandidatesWithoutStackFixups.push_back(C);
9818 }
9819
9820 // Is an unused register available? If so, we won't modify the stack, so
9821 // we can outline with the same frame type as those that don't save LR.
9822 else if (findRegisterToSaveLRTo(C)) {
9823 NumBytesNoStackCalls += 12;
9824 C.setCallInfo(MachineOutlinerRegSave, 12);
9825 CandidatesWithoutStackFixups.push_back(C);
9826 }
9827
9828 // Is SP used in the sequence at all? If not, we don't have to modify
9829 // the stack, so we are guaranteed to get the same frame.
9830 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
9831 NumBytesNoStackCalls += 12;
9832 C.setCallInfo(MachineOutlinerDefault, 12);
9833 CandidatesWithoutStackFixups.push_back(C);
9834 }
9835
9836 // If we outline this, we need to modify the stack. Pretend we don't
9837 // outline this by saving all of its bytes.
9838 else {
9839 NumBytesNoStackCalls += SequenceSize;
9840 }
9841 }
9842
9843 // If there are no places where we have to save LR, then note that we
9844 // don't have to update the stack. Otherwise, give every candidate the
9845 // default call type, as long as it's safe to do so.
9846 if (!AllStackInstrsSafe ||
9847 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9848 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9849 FrameID = MachineOutlinerNoLRSave;
9850 if (RepeatedSequenceLocs.size() < MinRepeats)
9851 return std::nullopt;
9852 } else {
9853 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9854
9855 // Bugzilla ID: 46767
9856 // TODO: Check if fixing up the stack more than once is safe so we can
9857 // outline these.
9858 //
9859 // An outline resulting in a caller that requires stack fixups at the
9860 // callsite to a callee that also requires stack fixups can happen when
9861 // there are no available registers at the candidate callsite for a
9862 // candidate that itself also has calls.
9863 //
9864 // In other words if function_containing_sequence in the following pseudo
9865 // assembly requires that we save LR at the point of the call, but there
9866 // are no available registers: in this case we save using SP and as a
9867 // result the SP offsets requires stack fixups by multiples of 16.
9868 //
9869 // function_containing_sequence:
9870 // ...
9871 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9872 // call OUTLINED_FUNCTION_N
9873 // restore LR from SP
9874 // ...
9875 //
9876 // OUTLINED_FUNCTION_N:
9877 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9878 // ...
9879 // bl foo
9880 // restore LR from SP
9881 // ret
9882 //
9883 // Because the code to handle more than one stack fixup does not
9884 // currently have the proper checks for legality, these cases will assert
9885 // in the AArch64 MachineOutliner. This is because the code to do this
9886 // needs more hardening, testing, better checks that generated code is
9887 // legal, etc and because it is only verified to handle a single pass of
9888 // stack fixup.
9889 //
9890 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9891 // these cases until they are known to be handled. Bugzilla 46767 is
9892 // referenced in comments at the assert site.
9893 //
9894 // To avoid asserting (or generating non-legal code on noassert builds)
9895 // we remove all candidates which would need more than one stack fixup by
9896 // pruning the cases where the candidate has calls while also having no
9897 // available LR and having no available general purpose registers to copy
9898 // LR to (ie one extra stack save/restore).
9899 //
9900 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9901 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9902 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9903 return (llvm::any_of(C, IsCall)) &&
9904 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9905 !findRegisterToSaveLRTo(C));
9906 });
9907 }
9908 }
9909
9910 // If we dropped all of the candidates, bail out here.
9911 if (RepeatedSequenceLocs.size() < MinRepeats)
9912 return std::nullopt;
9913 }
9914
9915 // Does every candidate's MBB contain a call? If so, then we might have a call
9916 // in the range.
9917 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9918 // Check if the range contains a call. These require a save + restore of the
9919 // link register.
9920 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9921 bool ModStackToSaveLR = false;
9922 if (any_of(drop_end(FirstCand),
9923 [](const MachineInstr &MI) { return MI.isCall(); }))
9924 ModStackToSaveLR = true;
9925
9926 // Handle the last instruction separately. If this is a tail call, then the
9927 // last instruction is a call. We don't want to save + restore in this case.
9928 // However, it could be possible that the last instruction is a call without
9929 // it being valid to tail call this sequence. We should consider this as
9930 // well.
9931 else if (FrameID != MachineOutlinerThunk &&
9932 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9933 ModStackToSaveLR = true;
9934
9935 if (ModStackToSaveLR) {
9936 // We can't fix up the stack. Bail out.
9937 if (!AllStackInstrsSafe)
9938 return std::nullopt;
9939
9940 // Save + restore LR.
9941 NumBytesToCreateFrame += 8;
9942 }
9943 }
9944
9945 // If we have CFI instructions, we can only outline if the outlined section
9946 // can be a tail call
9947 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9948 return std::nullopt;
9949
9950 return std::make_unique<outliner::OutlinedFunction>(
9951 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
9952}
9953
9954void AArch64InstrInfo::mergeOutliningCandidateAttributes(
9955 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9956 // If a bunch of candidates reach this point they must agree on their return
9957 // address signing. It is therefore enough to just consider the signing
9958 // behaviour of one of them
9959 const auto &CFn = Candidates.front().getMF()->getFunction();
9960
9961 if (CFn.hasFnAttribute("ptrauth-returns"))
9962 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9963 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
9964 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
9965 // Since all candidates belong to the same module, just copy the
9966 // function-level attributes of an arbitrary function.
9967 if (CFn.hasFnAttribute("sign-return-address"))
9968 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
9969 if (CFn.hasFnAttribute("sign-return-address-key"))
9970 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
9971
9972 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9973}
9974
9975bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
9976 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
9977 const Function &F = MF.getFunction();
9978
9979 // Can F be deduplicated by the linker? If it can, don't outline from it.
9980 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9981 return false;
9982
9983 // Don't outline from functions with section markings; the program could
9984 // expect that all the code is in the named section.
9985 // FIXME: Allow outlining from multiple functions with the same section
9986 // marking.
9987 if (F.hasSection())
9988 return false;
9989
9990 // Outlining from functions with redzones is unsafe since the outliner may
9991 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9992 // outline from it.
9993 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
9994 if (!AFI || AFI->hasRedZone().value_or(true))
9995 return false;
9996
9997 // FIXME: Determine whether it is safe to outline from functions which contain
9998 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9999 // outlined together and ensure it is safe to outline with async unwind info,
10000 // required for saving & restoring VG around calls.
10001 if (AFI->hasStreamingModeChanges())
10002 return false;
10003
10004 // FIXME: Teach the outliner to generate/handle Windows unwind info.
10006 return false;
10007
10008 // It's safe to outline from MF.
10009 return true;
10010}
10011
10013AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10014 unsigned &Flags) const {
10016 "Must track liveness!");
10018 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10019 Ranges;
10020 // According to the AArch64 Procedure Call Standard, the following are
10021 // undefined on entry/exit from a function call:
10022 //
10023 // * Registers x16, x17, (and thus w16, w17)
10024 // * Condition codes (and thus the NZCV register)
10025 //
10026 // If any of these registers are used inside or live across an outlined
10027 // function, then they may be modified later, either by the compiler or
10028 // some other tool (like the linker).
10029 //
10030 // To avoid outlining in these situations, partition each block into ranges
10031 // where these registers are dead. We will only outline from those ranges.
10032 LiveRegUnits LRU(getRegisterInfo());
10033 auto AreAllUnsafeRegsDead = [&LRU]() {
10034 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10035 LRU.available(AArch64::NZCV);
10036 };
10037
10038 // We need to know if LR is live across an outlining boundary later on in
10039 // order to decide how we'll create the outlined call, frame, etc.
10040 //
10041 // It's pretty expensive to check this for *every candidate* within a block.
10042 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10043 // to compute liveness from the end of the block for O(n) candidates within
10044 // the block.
10045 //
10046 // So, to improve the average case, let's keep track of liveness from the end
10047 // of the block to the beginning of *every outlinable range*. If we know that
10048 // LR is available in every range we could outline from, then we know that
10049 // we don't need to check liveness for any candidate within that range.
10050 bool LRAvailableEverywhere = true;
10051 // Compute liveness bottom-up.
10052 LRU.addLiveOuts(MBB);
10053 // Update flags that require info about the entire MBB.
10054 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10055 if (MI.isCall() && !MI.isTerminator())
10057 };
10058 // Range: [RangeBegin, RangeEnd)
10059 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10060 unsigned RangeLen;
10061 auto CreateNewRangeStartingAt =
10062 [&RangeBegin, &RangeEnd,
10063 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10064 RangeBegin = NewBegin;
10065 RangeEnd = std::next(RangeBegin);
10066 RangeLen = 0;
10067 };
10068 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10069 // At least one unsafe register is not dead. We do not want to outline at
10070 // this point. If it is long enough to outline from and does not cross a
10071 // bundle boundary, save the range [RangeBegin, RangeEnd).
10072 if (RangeLen <= 1)
10073 return;
10074 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10075 return;
10076 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10077 return;
10078 Ranges.emplace_back(RangeBegin, RangeEnd);
10079 };
10080 // Find the first point where all unsafe registers are dead.
10081 // FIND: <safe instr> <-- end of first potential range
10082 // SKIP: <unsafe def>
10083 // SKIP: ... everything between ...
10084 // SKIP: <unsafe use>
10085 auto FirstPossibleEndPt = MBB.instr_rbegin();
10086 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10087 LRU.stepBackward(*FirstPossibleEndPt);
10088 // Update flags that impact how we outline across the entire block,
10089 // regardless of safety.
10090 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10091 if (AreAllUnsafeRegsDead())
10092 break;
10093 }
10094 // If we exhausted the entire block, we have no safe ranges to outline.
10095 if (FirstPossibleEndPt == MBB.instr_rend())
10096 return Ranges;
10097 // Current range.
10098 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10099 // StartPt points to the first place where all unsafe registers
10100 // are dead (if there is any such point). Begin partitioning the MBB into
10101 // ranges.
10102 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10103 LRU.stepBackward(MI);
10104 UpdateWholeMBBFlags(MI);
10105 if (!AreAllUnsafeRegsDead()) {
10106 SaveRangeIfNonEmpty();
10107 CreateNewRangeStartingAt(MI.getIterator());
10108 continue;
10109 }
10110 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10111 RangeBegin = MI.getIterator();
10112 ++RangeLen;
10113 }
10114 // Above loop misses the last (or only) range. If we are still safe, then
10115 // let's save the range.
10116 if (AreAllUnsafeRegsDead())
10117 SaveRangeIfNonEmpty();
10118 if (Ranges.empty())
10119 return Ranges;
10120 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10121 // the order.
10122 std::reverse(Ranges.begin(), Ranges.end());
10123 // If there is at least one outlinable range where LR is unavailable
10124 // somewhere, remember that.
10125 if (!LRAvailableEverywhere)
10127 return Ranges;
10128}
10129
10131AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10133 unsigned Flags) const {
10134 MachineInstr &MI = *MIT;
10135
10136 // Don't outline anything used for return address signing. The outlined
10137 // function will get signed later if needed
10138 switch (MI.getOpcode()) {
10139 case AArch64::PACM:
10140 case AArch64::PACIASP:
10141 case AArch64::PACIBSP:
10142 case AArch64::PACIASPPC:
10143 case AArch64::PACIBSPPC:
10144 case AArch64::AUTIASP:
10145 case AArch64::AUTIBSP:
10146 case AArch64::AUTIASPPCi:
10147 case AArch64::AUTIASPPCr:
10148 case AArch64::AUTIBSPPCi:
10149 case AArch64::AUTIBSPPCr:
10150 case AArch64::RETAA:
10151 case AArch64::RETAB:
10152 case AArch64::RETAASPPCi:
10153 case AArch64::RETAASPPCr:
10154 case AArch64::RETABSPPCi:
10155 case AArch64::RETABSPPCr:
10156 case AArch64::EMITBKEY:
10157 case AArch64::PAUTH_PROLOGUE:
10158 case AArch64::PAUTH_EPILOGUE:
10160 }
10161
10162 // We can only outline these if we will tail call the outlined function, or
10163 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10164 // in a tail call.
10165 //
10166 // FIXME: If the proper fixups for the offset are implemented, this should be
10167 // possible.
10168 if (MI.isCFIInstruction())
10170
10171 // Is this a terminator for a basic block?
10172 if (MI.isTerminator())
10173 // TargetInstrInfo::getOutliningType has already filtered out anything
10174 // that would break this, so we can allow it here.
10176
10177 // Make sure none of the operands are un-outlinable.
10178 for (const MachineOperand &MOP : MI.operands()) {
10179 // A check preventing CFI indices was here before, but only CFI
10180 // instructions should have those.
10181 assert(!MOP.isCFIIndex());
10182
10183 // If it uses LR or W30 explicitly, then don't touch it.
10184 if (MOP.isReg() && !MOP.isImplicit() &&
10185 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10187 }
10188
10189 // Special cases for instructions that can always be outlined, but will fail
10190 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10191 // be outlined because they don't require a *specific* value to be in LR.
10192 if (MI.getOpcode() == AArch64::ADRP)
10194
10195 // If MI is a call we might be able to outline it. We don't want to outline
10196 // any calls that rely on the position of items on the stack. When we outline
10197 // something containing a call, we have to emit a save and restore of LR in
10198 // the outlined function. Currently, this always happens by saving LR to the
10199 // stack. Thus, if we outline, say, half the parameters for a function call
10200 // plus the call, then we'll break the callee's expectations for the layout
10201 // of the stack.
10202 //
10203 // FIXME: Allow calls to functions which construct a stack frame, as long
10204 // as they don't access arguments on the stack.
10205 // FIXME: Figure out some way to analyze functions defined in other modules.
10206 // We should be able to compute the memory usage based on the IR calling
10207 // convention, even if we can't see the definition.
10208 if (MI.isCall()) {
10209 // Get the function associated with the call. Look at each operand and find
10210 // the one that represents the callee and get its name.
10211 const Function *Callee = nullptr;
10212 for (const MachineOperand &MOP : MI.operands()) {
10213 if (MOP.isGlobal()) {
10214 Callee = dyn_cast<Function>(MOP.getGlobal());
10215 break;
10216 }
10217 }
10218
10219 // Never outline calls to mcount. There isn't any rule that would require
10220 // this, but the Linux kernel's "ftrace" feature depends on it.
10221 if (Callee && Callee->getName() == "\01_mcount")
10223
10224 // If we don't know anything about the callee, assume it depends on the
10225 // stack layout of the caller. In that case, it's only legal to outline
10226 // as a tail-call. Explicitly list the call instructions we know about so we
10227 // don't get unexpected results with call pseudo-instructions.
10228 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10229 if (MI.getOpcode() == AArch64::BLR ||
10230 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10231 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10232
10233 if (!Callee)
10234 return UnknownCallOutlineType;
10235
10236 // We have a function we have information about. Check it if it's something
10237 // can safely outline.
10238 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10239
10240 // We don't know what's going on with the callee at all. Don't touch it.
10241 if (!CalleeMF)
10242 return UnknownCallOutlineType;
10243
10244 // Check if we know anything about the callee saves on the function. If we
10245 // don't, then don't touch it, since that implies that we haven't
10246 // computed anything about its stack frame yet.
10247 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10248 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10249 MFI.getNumObjects() > 0)
10250 return UnknownCallOutlineType;
10251
10252 // At this point, we can say that CalleeMF ought to not pass anything on the
10253 // stack. Therefore, we can outline it.
10255 }
10256
10257 // Don't touch the link register or W30.
10258 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10259 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10261
10262 // Don't outline BTI instructions, because that will prevent the outlining
10263 // site from being indirectly callable.
10264 if (hasBTISemantics(MI))
10266
10268}
10269
10270void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10271 for (MachineInstr &MI : MBB) {
10272 const MachineOperand *Base;
10273 TypeSize Width(0, false);
10274 int64_t Offset;
10275 bool OffsetIsScalable;
10276
10277 // Is this a load or store with an immediate offset with SP as the base?
10278 if (!MI.mayLoadOrStore() ||
10279 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10280 &RI) ||
10281 (Base->isReg() && Base->getReg() != AArch64::SP))
10282 continue;
10283
10284 // It is, so we have to fix it up.
10285 TypeSize Scale(0U, false);
10286 int64_t Dummy1, Dummy2;
10287
10288 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10289 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10290 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10291 assert(Scale != 0 && "Unexpected opcode!");
10292 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10293
10294 // We've pushed the return address to the stack, so add 16 to the offset.
10295 // This is safe, since we already checked if it would overflow when we
10296 // checked if this instruction was legal to outline.
10297 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10298 StackOffsetOperand.setImm(NewImm);
10299 }
10300}
10301
10303 const AArch64InstrInfo *TII,
10304 bool ShouldSignReturnAddr) {
10305 if (!ShouldSignReturnAddr)
10306 return;
10307
10308 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10310 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10311 TII->get(AArch64::PAUTH_EPILOGUE))
10313}
10314
10315void AArch64InstrInfo::buildOutlinedFrame(
10317 const outliner::OutlinedFunction &OF) const {
10318
10319 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10320
10321 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10322 FI->setOutliningStyle("Tail Call");
10323 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10324 // For thunk outlining, rewrite the last instruction from a call to a
10325 // tail-call.
10326 MachineInstr *Call = &*--MBB.instr_end();
10327 unsigned TailOpcode;
10328 if (Call->getOpcode() == AArch64::BL) {
10329 TailOpcode = AArch64::TCRETURNdi;
10330 } else {
10331 assert(Call->getOpcode() == AArch64::BLR ||
10332 Call->getOpcode() == AArch64::BLRNoIP);
10333 TailOpcode = AArch64::TCRETURNriALL;
10334 }
10335 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10336 .add(Call->getOperand(0))
10337 .addImm(0);
10338 MBB.insert(MBB.end(), TC);
10340
10341 FI->setOutliningStyle("Thunk");
10342 }
10343
10344 bool IsLeafFunction = true;
10345
10346 // Is there a call in the outlined range?
10347 auto IsNonTailCall = [](const MachineInstr &MI) {
10348 return MI.isCall() && !MI.isReturn();
10349 };
10350
10351 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10352 // Fix up the instructions in the range, since we're going to modify the
10353 // stack.
10354
10355 // Bugzilla ID: 46767
10356 // TODO: Check if fixing up twice is safe so we can outline these.
10357 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10358 "Can only fix up stack references once");
10359 fixupPostOutline(MBB);
10360
10361 IsLeafFunction = false;
10362
10363 // LR has to be a live in so that we can save it.
10364 if (!MBB.isLiveIn(AArch64::LR))
10365 MBB.addLiveIn(AArch64::LR);
10366
10369
10370 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10371 OF.FrameConstructionID == MachineOutlinerThunk)
10372 Et = std::prev(MBB.end());
10373
10374 // Insert a save before the outlined region
10375 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10376 .addReg(AArch64::SP, RegState::Define)
10377 .addReg(AArch64::LR)
10378 .addReg(AArch64::SP)
10379 .addImm(-16);
10380 It = MBB.insert(It, STRXpre);
10381
10382 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10383 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10384
10385 // Add a CFI saying the stack was moved 16 B down.
10386 CFIBuilder.buildDefCFAOffset(16);
10387
10388 // Add a CFI saying that the LR that we want to find is now 16 B higher
10389 // than before.
10390 CFIBuilder.buildOffset(AArch64::LR, -16);
10391 }
10392
10393 // Insert a restore before the terminator for the function.
10394 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10395 .addReg(AArch64::SP, RegState::Define)
10396 .addReg(AArch64::LR, RegState::Define)
10397 .addReg(AArch64::SP)
10398 .addImm(16);
10399 Et = MBB.insert(Et, LDRXpost);
10400 }
10401
10402 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
10403
10404 // If this is a tail call outlined function, then there's already a return.
10405 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10406 OF.FrameConstructionID == MachineOutlinerThunk) {
10407 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10408 return;
10409 }
10410
10411 // It's not a tail call, so we have to insert the return ourselves.
10412
10413 // LR has to be a live in so that we can return to it.
10414 if (!MBB.isLiveIn(AArch64::LR))
10415 MBB.addLiveIn(AArch64::LR);
10416
10417 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10418 .addReg(AArch64::LR);
10419 MBB.insert(MBB.end(), ret);
10420
10421 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10422
10423 FI->setOutliningStyle("Function");
10424
10425 // Did we have to modify the stack by saving the link register?
10426 if (OF.FrameConstructionID != MachineOutlinerDefault)
10427 return;
10428
10429 // We modified the stack.
10430 // Walk over the basic block and fix up all the stack accesses.
10431 fixupPostOutline(MBB);
10432}
10433
10434MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10437
10438 // Are we tail calling?
10439 if (C.CallConstructionID == MachineOutlinerTailCall) {
10440 // If yes, then we can just branch to the label.
10441 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10442 .addGlobalAddress(M.getNamedValue(MF.getName()))
10443 .addImm(0));
10444 return It;
10445 }
10446
10447 // Are we saving the link register?
10448 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10449 C.CallConstructionID == MachineOutlinerThunk) {
10450 // No, so just insert the call.
10451 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10452 .addGlobalAddress(M.getNamedValue(MF.getName())));
10453 return It;
10454 }
10455
10456 // We want to return the spot where we inserted the call.
10458
10459 // Instructions for saving and restoring LR around the call instruction we're
10460 // going to insert.
10461 MachineInstr *Save;
10462 MachineInstr *Restore;
10463 // Can we save to a register?
10464 if (C.CallConstructionID == MachineOutlinerRegSave) {
10465 // FIXME: This logic should be sunk into a target-specific interface so that
10466 // we don't have to recompute the register.
10467 Register Reg = findRegisterToSaveLRTo(C);
10468 assert(Reg && "No callee-saved register available?");
10469
10470 // LR has to be a live in so that we can save it.
10471 if (!MBB.isLiveIn(AArch64::LR))
10472 MBB.addLiveIn(AArch64::LR);
10473
10474 // Save and restore LR from Reg.
10475 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10476 .addReg(AArch64::XZR)
10477 .addReg(AArch64::LR)
10478 .addImm(0);
10479 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10480 .addReg(AArch64::XZR)
10481 .addReg(Reg)
10482 .addImm(0);
10483 } else {
10484 // We have the default case. Save and restore from SP.
10485 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10486 .addReg(AArch64::SP, RegState::Define)
10487 .addReg(AArch64::LR)
10488 .addReg(AArch64::SP)
10489 .addImm(-16);
10490 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10491 .addReg(AArch64::SP, RegState::Define)
10492 .addReg(AArch64::LR, RegState::Define)
10493 .addReg(AArch64::SP)
10494 .addImm(16);
10495 }
10496
10497 It = MBB.insert(It, Save);
10498 It++;
10499
10500 // Insert the call.
10501 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10502 .addGlobalAddress(M.getNamedValue(MF.getName())));
10503 CallPt = It;
10504 It++;
10505
10506 It = MBB.insert(It, Restore);
10507 return CallPt;
10508}
10509
10510bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10511 MachineFunction &MF) const {
10512 return MF.getFunction().hasMinSize();
10513}
10514
10515void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10517 DebugLoc &DL,
10518 bool AllowSideEffects) const {
10519 const MachineFunction &MF = *MBB.getParent();
10520 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10521 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10522
10523 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10524 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10525 } else if (STI.isSVEorStreamingSVEAvailable()) {
10526 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10527 .addImm(0)
10528 .addImm(0);
10529 } else if (STI.isNeonAvailable()) {
10530 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10531 .addImm(0);
10532 } else {
10533 // This is a streaming-compatible function without SVE. We don't have full
10534 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10535 // So given `movi v..` would be illegal use `fmov d..` instead.
10536 assert(STI.hasNEON() && "Expected to have NEON.");
10537 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10538 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10539 }
10540}
10541
10542std::optional<DestSourcePair>
10544
10545 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10546 // and zero immediate operands used as an alias for mov instruction.
10547 if (((MI.getOpcode() == AArch64::ORRWrs &&
10548 MI.getOperand(1).getReg() == AArch64::WZR &&
10549 MI.getOperand(3).getImm() == 0x0) ||
10550 (MI.getOpcode() == AArch64::ORRWrr &&
10551 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10552 // Check that the w->w move is not a zero-extending w->x mov.
10553 (!MI.getOperand(0).getReg().isVirtual() ||
10554 MI.getOperand(0).getSubReg() == 0) &&
10555 (!MI.getOperand(0).getReg().isPhysical() ||
10556 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10557 /*TRI=*/nullptr) == -1))
10558 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10559
10560 if (MI.getOpcode() == AArch64::ORRXrs &&
10561 MI.getOperand(1).getReg() == AArch64::XZR &&
10562 MI.getOperand(3).getImm() == 0x0)
10563 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10564
10565 return std::nullopt;
10566}
10567
10568std::optional<DestSourcePair>
10570 if ((MI.getOpcode() == AArch64::ORRWrs &&
10571 MI.getOperand(1).getReg() == AArch64::WZR &&
10572 MI.getOperand(3).getImm() == 0x0) ||
10573 (MI.getOpcode() == AArch64::ORRWrr &&
10574 MI.getOperand(1).getReg() == AArch64::WZR))
10575 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10576 return std::nullopt;
10577}
10578
10579std::optional<RegImmPair>
10580AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10581 int Sign = 1;
10582 int64_t Offset = 0;
10583
10584 // TODO: Handle cases where Reg is a super- or sub-register of the
10585 // destination register.
10586 const MachineOperand &Op0 = MI.getOperand(0);
10587 if (!Op0.isReg() || Reg != Op0.getReg())
10588 return std::nullopt;
10589
10590 switch (MI.getOpcode()) {
10591 default:
10592 return std::nullopt;
10593 case AArch64::SUBWri:
10594 case AArch64::SUBXri:
10595 case AArch64::SUBSWri:
10596 case AArch64::SUBSXri:
10597 Sign *= -1;
10598 [[fallthrough]];
10599 case AArch64::ADDSWri:
10600 case AArch64::ADDSXri:
10601 case AArch64::ADDWri:
10602 case AArch64::ADDXri: {
10603 // TODO: Third operand can be global address (usually some string).
10604 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
10605 !MI.getOperand(2).isImm())
10606 return std::nullopt;
10607 int Shift = MI.getOperand(3).getImm();
10608 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10609 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
10610 }
10611 }
10612 return RegImmPair{MI.getOperand(1).getReg(), Offset};
10613}
10614
10615/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10616/// the destination register then, if possible, describe the value in terms of
10617/// the source register.
10618static std::optional<ParamLoadedValue>
10620 const TargetInstrInfo *TII,
10621 const TargetRegisterInfo *TRI) {
10622 auto DestSrc = TII->isCopyLikeInstr(MI);
10623 if (!DestSrc)
10624 return std::nullopt;
10625
10626 Register DestReg = DestSrc->Destination->getReg();
10627 Register SrcReg = DestSrc->Source->getReg();
10628
10629 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10630
10631 // If the described register is the destination, just return the source.
10632 if (DestReg == DescribedReg)
10633 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10634
10635 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10636 if (MI.getOpcode() == AArch64::ORRWrs &&
10637 TRI->isSuperRegister(DestReg, DescribedReg))
10638 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10639
10640 // We may need to describe the lower part of a ORRXrs move.
10641 if (MI.getOpcode() == AArch64::ORRXrs &&
10642 TRI->isSubRegister(DestReg, DescribedReg)) {
10643 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
10644 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10645 }
10646
10647 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10648 "Unhandled ORR[XW]rs copy case");
10649
10650 return std::nullopt;
10651}
10652
10653bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
10654 // Functions cannot be split to different sections on AArch64 if they have
10655 // a red zone. This is because relaxing a cross-section branch may require
10656 // incrementing the stack pointer to spill a register, which would overwrite
10657 // the red zone.
10658 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
10659 return false;
10660
10662}
10663
10664bool AArch64InstrInfo::isMBBSafeToSplitToCold(
10665 const MachineBasicBlock &MBB) const {
10666 // Asm Goto blocks can contain conditional branches to goto labels, which can
10667 // get moved out of range of the branch instruction.
10668 auto isAsmGoto = [](const MachineInstr &MI) {
10669 return MI.getOpcode() == AArch64::INLINEASM_BR;
10670 };
10671 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
10672 return false;
10673
10674 // Because jump tables are label-relative instead of table-relative, they all
10675 // must be in the same section or relocation fixup handling will fail.
10676
10677 // Check if MBB is a jump table target
10678 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
10679 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10680 return llvm::is_contained(JTE.MBBs, &MBB);
10681 };
10682 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
10683 return false;
10684
10685 // Check if MBB contains a jump table lookup
10686 for (const MachineInstr &MI : MBB) {
10687 switch (MI.getOpcode()) {
10688 case TargetOpcode::G_BRJT:
10689 case AArch64::JumpTableDest32:
10690 case AArch64::JumpTableDest16:
10691 case AArch64::JumpTableDest8:
10692 return false;
10693 default:
10694 continue;
10695 }
10696 }
10697
10698 // MBB isn't a special case, so it's safe to be split to the cold section.
10699 return true;
10700}
10701
10702std::optional<ParamLoadedValue>
10703AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
10704 Register Reg) const {
10705 const MachineFunction *MF = MI.getMF();
10706 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
10707 switch (MI.getOpcode()) {
10708 case AArch64::MOVZWi:
10709 case AArch64::MOVZXi: {
10710 // MOVZWi may be used for producing zero-extended 32-bit immediates in
10711 // 64-bit parameters, so we need to consider super-registers.
10712 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10713 return std::nullopt;
10714
10715 if (!MI.getOperand(1).isImm())
10716 return std::nullopt;
10717 int64_t Immediate = MI.getOperand(1).getImm();
10718 int Shift = MI.getOperand(2).getImm();
10719 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
10720 nullptr);
10721 }
10722 case AArch64::ORRWrs:
10723 case AArch64::ORRXrs:
10724 return describeORRLoadedValue(MI, Reg, this, TRI);
10725 }
10726
10728}
10729
10730bool AArch64InstrInfo::isExtendLikelyToBeFolded(
10731 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
10732 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
10733 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
10734 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
10735
10736 // Anyexts are nops.
10737 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
10738 return true;
10739
10740 Register DefReg = ExtMI.getOperand(0).getReg();
10741 if (!MRI.hasOneNonDBGUse(DefReg))
10742 return false;
10743
10744 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
10745 // addressing mode.
10746 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
10747 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
10748}
10749
10750uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
10751 return get(Opc).TSFlags & AArch64::ElementSizeMask;
10752}
10753
10754bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
10755 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
10756}
10757
10758bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
10759 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
10760}
10761
10762unsigned int
10763AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
10764 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
10765}
10766
10767bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
10768 unsigned Scale) const {
10769 if (Offset && Scale)
10770 return false;
10771
10772 // Check Reg + Imm
10773 if (!Scale) {
10774 // 9-bit signed offset
10775 if (isInt<9>(Offset))
10776 return true;
10777
10778 // 12-bit unsigned offset
10779 unsigned Shift = Log2_64(NumBytes);
10780 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
10781 // Must be a multiple of NumBytes (NumBytes is a power of 2)
10782 (Offset >> Shift) << Shift == Offset)
10783 return true;
10784 return false;
10785 }
10786
10787 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
10788 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
10789}
10790
10792 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10793 return AArch64::BLRNoIP;
10794 else
10795 return AArch64::BLR;
10796}
10797
10800 Register TargetReg, bool FrameSetup) const {
10801 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
10802
10803 MachineBasicBlock &MBB = *MBBI->getParent();
10804 MachineFunction &MF = *MBB.getParent();
10805 const AArch64InstrInfo *TII =
10806 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10807 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10808 DebugLoc DL = MBB.findDebugLoc(MBBI);
10809
10810 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
10811 MachineBasicBlock *LoopTestMBB =
10812 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10813 MF.insert(MBBInsertPoint, LoopTestMBB);
10814 MachineBasicBlock *LoopBodyMBB =
10815 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10816 MF.insert(MBBInsertPoint, LoopBodyMBB);
10817 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10818 MF.insert(MBBInsertPoint, ExitMBB);
10819 MachineInstr::MIFlag Flags =
10821
10822 // LoopTest:
10823 // SUB SP, SP, #ProbeSize
10824 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
10825 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
10826
10827 // CMP SP, TargetReg
10828 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
10829 AArch64::XZR)
10830 .addReg(AArch64::SP)
10831 .addReg(TargetReg)
10833 .setMIFlags(Flags);
10834
10835 // B.<Cond> LoopExit
10836 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
10838 .addMBB(ExitMBB)
10839 .setMIFlags(Flags);
10840
10841 // STR XZR, [SP]
10842 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
10843 .addReg(AArch64::XZR)
10844 .addReg(AArch64::SP)
10845 .addImm(0)
10846 .setMIFlags(Flags);
10847
10848 // B loop
10849 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
10850 .addMBB(LoopTestMBB)
10851 .setMIFlags(Flags);
10852
10853 // LoopExit:
10854 // MOV SP, TargetReg
10855 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10856 .addReg(TargetReg)
10857 .addImm(0)
10859 .setMIFlags(Flags);
10860
10861 // LDR XZR, [SP]
10862 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10863 .addReg(AArch64::XZR, RegState::Define)
10864 .addReg(AArch64::SP)
10865 .addImm(0)
10866 .setMIFlags(Flags);
10867
10868 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10870
10871 LoopTestMBB->addSuccessor(ExitMBB);
10872 LoopTestMBB->addSuccessor(LoopBodyMBB);
10873 LoopBodyMBB->addSuccessor(LoopTestMBB);
10874 MBB.addSuccessor(LoopTestMBB);
10875
10876 // Update liveins.
10877 if (MF.getRegInfo().reservedRegsFrozen())
10878 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10879
10880 return ExitMBB->begin();
10881}
10882
10883namespace {
10884class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10885 MachineFunction *MF;
10886 const TargetInstrInfo *TII;
10887 const TargetRegisterInfo *TRI;
10889
10890 /// The block of the loop
10891 MachineBasicBlock *LoopBB;
10892 /// The conditional branch of the loop
10893 MachineInstr *CondBranch;
10894 /// The compare instruction for loop control
10895 MachineInstr *Comp;
10896 /// The number of the operand of the loop counter value in Comp
10897 unsigned CompCounterOprNum;
10898 /// The instruction that updates the loop counter value
10899 MachineInstr *Update;
10900 /// The number of the operand of the loop counter value in Update
10901 unsigned UpdateCounterOprNum;
10902 /// The initial value of the loop counter
10903 Register Init;
10904 /// True iff Update is a predecessor of Comp
10905 bool IsUpdatePriorComp;
10906
10907 /// The normalized condition used by createTripCountGreaterCondition()
10909
10910public:
10911 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10912 MachineInstr *Comp, unsigned CompCounterOprNum,
10913 MachineInstr *Update, unsigned UpdateCounterOprNum,
10914 Register Init, bool IsUpdatePriorComp,
10916 : MF(Comp->getParent()->getParent()),
10917 TII(MF->getSubtarget().getInstrInfo()),
10918 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10919 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10920 CompCounterOprNum(CompCounterOprNum), Update(Update),
10921 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10922 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10923
10924 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10925 // Make the instructions for loop control be placed in stage 0.
10926 // The predecessors of Comp are considered by the caller.
10927 return MI == Comp;
10928 }
10929
10930 std::optional<bool> createTripCountGreaterCondition(
10931 int TC, MachineBasicBlock &MBB,
10932 SmallVectorImpl<MachineOperand> &CondParam) override {
10933 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10934 // Cond is normalized for such use.
10935 // The predecessors of the branch are assumed to have already been inserted.
10936 CondParam = Cond;
10937 return {};
10938 }
10939
10940 void createRemainingIterationsGreaterCondition(
10941 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10942 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10943
10944 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10945
10946 void adjustTripCount(int TripCountAdjust) override {}
10947
10948 bool isMVEExpanderSupported() override { return true; }
10949};
10950} // namespace
10951
10952/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10953/// is replaced by ReplaceReg. The output register is newly created.
10954/// The other operands are unchanged from MI.
10955static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10956 Register ReplaceReg, MachineBasicBlock &MBB,
10957 MachineBasicBlock::iterator InsertTo) {
10958 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
10959 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
10960 const TargetRegisterInfo *TRI =
10961 MBB.getParent()->getSubtarget().getRegisterInfo();
10962 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
10963 Register Result = 0;
10964 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
10965 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
10966 Result = MRI.createVirtualRegister(
10967 MRI.getRegClass(NewMI->getOperand(0).getReg()));
10968 NewMI->getOperand(I).setReg(Result);
10969 } else if (I == ReplaceOprNum) {
10970 MRI.constrainRegClass(ReplaceReg,
10971 TII->getRegClass(NewMI->getDesc(), I, TRI));
10972 NewMI->getOperand(I).setReg(ReplaceReg);
10973 }
10974 }
10975 MBB.insert(InsertTo, NewMI);
10976 return Result;
10977}
10978
10979void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10982 // Create and accumulate conditions for next TC iterations.
10983 // Example:
10984 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10985 // # iteration of the kernel
10986 //
10987 // # insert the following instructions
10988 // cond = CSINCXr 0, 0, C, implicit $nzcv
10989 // counter = ADDXri counter, 1 # clone from this->Update
10990 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10991 // cond = CSINCXr cond, cond, C, implicit $nzcv
10992 // ... (repeat TC times)
10993 // SUBSXri cond, 0, implicit-def $nzcv
10994
10995 assert(CondBranch->getOpcode() == AArch64::Bcc);
10996 // CondCode to exit the loop
10998 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
10999 if (CondBranch->getOperand(1).getMBB() == LoopBB)
11001
11002 // Accumulate conditions to exit the loop
11003 Register AccCond = AArch64::XZR;
11004
11005 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
11006 auto AccumulateCond = [&](Register CurCond,
11008 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11009 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11010 .addReg(NewCond, RegState::Define)
11011 .addReg(CurCond)
11012 .addReg(CurCond)
11014 return NewCond;
11015 };
11016
11017 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11018 // Update and Comp for I==0 are already exists in MBB
11019 // (MBB is an unrolled kernel)
11020 Register Counter;
11021 for (int I = 0; I <= TC; ++I) {
11022 Register NextCounter;
11023 if (I != 0)
11024 NextCounter =
11025 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11026
11027 AccCond = AccumulateCond(AccCond, CC);
11028
11029 if (I != TC) {
11030 if (I == 0) {
11031 if (Update != Comp && IsUpdatePriorComp) {
11032 Counter =
11033 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11034 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11035 MBB.end());
11036 } else {
11037 // can use already calculated value
11038 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11039 }
11040 } else if (Update != Comp) {
11041 NextCounter =
11042 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11043 }
11044 }
11045 Counter = NextCounter;
11046 }
11047 } else {
11048 Register Counter;
11049 if (LastStage0Insts.empty()) {
11050 // use initial counter value (testing if the trip count is sufficient to
11051 // be executed by pipelined code)
11052 Counter = Init;
11053 if (IsUpdatePriorComp)
11054 Counter =
11055 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11056 } else {
11057 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11058 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11059 }
11060
11061 for (int I = 0; I <= TC; ++I) {
11062 Register NextCounter;
11063 NextCounter =
11064 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11065 AccCond = AccumulateCond(AccCond, CC);
11066 if (I != TC && Update != Comp)
11067 NextCounter =
11068 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11069 Counter = NextCounter;
11070 }
11071 }
11072
11073 // If AccCond == 0, the remainder is greater than TC.
11074 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11075 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11076 .addReg(AccCond)
11077 .addImm(0)
11078 .addImm(0);
11079 Cond.clear();
11081}
11082
11083static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11084 Register &RegMBB, Register &RegOther) {
11085 assert(Phi.getNumOperands() == 5);
11086 if (Phi.getOperand(2).getMBB() == MBB) {
11087 RegMBB = Phi.getOperand(1).getReg();
11088 RegOther = Phi.getOperand(3).getReg();
11089 } else {
11090 assert(Phi.getOperand(4).getMBB() == MBB);
11091 RegMBB = Phi.getOperand(3).getReg();
11092 RegOther = Phi.getOperand(1).getReg();
11093 }
11094}
11095
11097 if (!Reg.isVirtual())
11098 return false;
11099 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11100 return MRI.getVRegDef(Reg)->getParent() != BB;
11101}
11102
11103/// If Reg is an induction variable, return true and set some parameters
11104static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11105 MachineInstr *&UpdateInst,
11106 unsigned &UpdateCounterOprNum, Register &InitReg,
11107 bool &IsUpdatePriorComp) {
11108 // Example:
11109 //
11110 // Preheader:
11111 // InitReg = ...
11112 // LoopBB:
11113 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11114 // Reg = COPY Reg0 ; COPY is ignored.
11115 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11116 // ; Reg is the value calculated in the previous
11117 // ; iteration, so IsUpdatePriorComp == false.
11118
11119 if (LoopBB->pred_size() != 2)
11120 return false;
11121 if (!Reg.isVirtual())
11122 return false;
11123 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11124 UpdateInst = nullptr;
11125 UpdateCounterOprNum = 0;
11126 InitReg = 0;
11127 IsUpdatePriorComp = true;
11128 Register CurReg = Reg;
11129 while (true) {
11130 MachineInstr *Def = MRI.getVRegDef(CurReg);
11131 if (Def->getParent() != LoopBB)
11132 return false;
11133 if (Def->isCopy()) {
11134 // Ignore copy instructions unless they contain subregisters
11135 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11136 return false;
11137 CurReg = Def->getOperand(1).getReg();
11138 } else if (Def->isPHI()) {
11139 if (InitReg != 0)
11140 return false;
11141 if (!UpdateInst)
11142 IsUpdatePriorComp = false;
11143 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11144 } else {
11145 if (UpdateInst)
11146 return false;
11147 switch (Def->getOpcode()) {
11148 case AArch64::ADDSXri:
11149 case AArch64::ADDSWri:
11150 case AArch64::SUBSXri:
11151 case AArch64::SUBSWri:
11152 case AArch64::ADDXri:
11153 case AArch64::ADDWri:
11154 case AArch64::SUBXri:
11155 case AArch64::SUBWri:
11156 UpdateInst = Def;
11157 UpdateCounterOprNum = 1;
11158 break;
11159 case AArch64::ADDSXrr:
11160 case AArch64::ADDSWrr:
11161 case AArch64::SUBSXrr:
11162 case AArch64::SUBSWrr:
11163 case AArch64::ADDXrr:
11164 case AArch64::ADDWrr:
11165 case AArch64::SUBXrr:
11166 case AArch64::SUBWrr:
11167 UpdateInst = Def;
11168 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11169 UpdateCounterOprNum = 1;
11170 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11171 UpdateCounterOprNum = 2;
11172 else
11173 return false;
11174 break;
11175 default:
11176 return false;
11177 }
11178 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11179 }
11180
11181 if (!CurReg.isVirtual())
11182 return false;
11183 if (Reg == CurReg)
11184 break;
11185 }
11186
11187 if (!UpdateInst)
11188 return false;
11189
11190 return true;
11191}
11192
11193std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11195 // Accept loops that meet the following conditions
11196 // * The conditional branch is BCC
11197 // * The compare instruction is ADDS/SUBS/WHILEXX
11198 // * One operand of the compare is an induction variable and the other is a
11199 // loop invariant value
11200 // * The induction variable is incremented/decremented by a single instruction
11201 // * Does not contain CALL or instructions which have unmodeled side effects
11202
11203 for (MachineInstr &MI : *LoopBB)
11204 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11205 // This instruction may use NZCV, which interferes with the instruction to
11206 // be inserted for loop control.
11207 return nullptr;
11208
11209 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11211 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11212 return nullptr;
11213
11214 // Infinite loops are not supported
11215 if (TBB == LoopBB && FBB == LoopBB)
11216 return nullptr;
11217
11218 // Must be conditional branch
11219 if (TBB != LoopBB && FBB == nullptr)
11220 return nullptr;
11221
11222 assert((TBB == LoopBB || FBB == LoopBB) &&
11223 "The Loop must be a single-basic-block loop");
11224
11225 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11227
11228 if (CondBranch->getOpcode() != AArch64::Bcc)
11229 return nullptr;
11230
11231 // Normalization for createTripCountGreaterCondition()
11232 if (TBB == LoopBB)
11234
11235 MachineInstr *Comp = nullptr;
11236 unsigned CompCounterOprNum = 0;
11237 for (MachineInstr &MI : reverse(*LoopBB)) {
11238 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11239 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11240 // operands is a loop invariant value
11241
11242 switch (MI.getOpcode()) {
11243 case AArch64::SUBSXri:
11244 case AArch64::SUBSWri:
11245 case AArch64::ADDSXri:
11246 case AArch64::ADDSWri:
11247 Comp = &MI;
11248 CompCounterOprNum = 1;
11249 break;
11250 case AArch64::ADDSWrr:
11251 case AArch64::ADDSXrr:
11252 case AArch64::SUBSWrr:
11253 case AArch64::SUBSXrr:
11254 Comp = &MI;
11255 break;
11256 default:
11257 if (isWhileOpcode(MI.getOpcode())) {
11258 Comp = &MI;
11259 break;
11260 }
11261 return nullptr;
11262 }
11263
11264 if (CompCounterOprNum == 0) {
11265 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11266 CompCounterOprNum = 2;
11267 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11268 CompCounterOprNum = 1;
11269 else
11270 return nullptr;
11271 }
11272 break;
11273 }
11274 }
11275 if (!Comp)
11276 return nullptr;
11277
11278 MachineInstr *Update = nullptr;
11279 Register Init;
11280 bool IsUpdatePriorComp;
11281 unsigned UpdateCounterOprNum;
11282 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11283 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11284 return nullptr;
11285
11286 return std::make_unique<AArch64PipelinerLoopInfo>(
11287 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11288 Init, IsUpdatePriorComp, Cond);
11289}
11290
11291/// verifyInstruction - Perform target specific instruction verification.
11292bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11293 StringRef &ErrInfo) const {
11294 // Verify that immediate offsets on load/store instructions are within range.
11295 // Stack objects with an FI operand are excluded as they can be fixed up
11296 // during PEI.
11297 TypeSize Scale(0U, false), Width(0U, false);
11298 int64_t MinOffset, MaxOffset;
11299 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11300 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11301 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11302 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11303 if (Imm < MinOffset || Imm > MaxOffset) {
11304 ErrInfo = "Unexpected immediate on load/store instruction";
11305 return false;
11306 }
11307 }
11308 }
11309
11310 const MCInstrDesc &MCID = MI.getDesc();
11311 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11312 const MachineOperand &MO = MI.getOperand(Op);
11313 switch (MCID.operands()[Op].OperandType) {
11315 if (!MO.isImm() || MO.getImm() != 0) {
11316 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11317 return false;
11318 }
11319 break;
11321 if (!MO.isImm() ||
11323 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11324 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11325 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11326 return false;
11327 }
11328 break;
11329 default:
11330 break;
11331 }
11332 }
11333 return true;
11334}
11335
11336#define GET_INSTRINFO_HELPERS
11337#define GET_INSTRMAP_INFO
11338#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
static constexpr unsigned NoRegister
Definition MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:61
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
Represents a location in source code.
Definition SMLoc.h:23
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:42
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347
Value * getOperand(unsigned i) const
Definition User.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
Definition MathExtras.h:47
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2100
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:238
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.