LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/GlobalValue.h"
45#include "llvm/IR/Module.h"
46#include "llvm/MC/MCAsmInfo.h"
47#include "llvm/MC/MCInst.h"
49#include "llvm/MC/MCInstrDesc.h"
54#include "llvm/Support/LEB128.h"
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <utility>
62
63using namespace llvm;
64
65#define GET_INSTRINFO_CTOR_DTOR
66#include "AArch64GenInstrInfo.inc"
67
69 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
70 cl::desc("Restrict range of CB instructions (DEBUG)"));
71
73 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
74 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
75
77 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
78 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
79
81 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
82 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
83
85 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
86 cl::desc("Restrict range of B instructions (DEBUG)"));
87
89 "aarch64-search-limit", cl::Hidden, cl::init(2048),
90 cl::desc("Restrict range of instructions to search for the "
91 "machine-combiner gather pattern optimization"));
92
94 : AArch64GenInstrInfo(STI, RI, AArch64::ADJCALLSTACKDOWN,
95 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
96 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
97
98/// GetInstSize - Return the number of bytes of code the specified
99/// instruction may be. This returns the maximum number of bytes.
101 const MachineBasicBlock &MBB = *MI.getParent();
102 const MachineFunction *MF = MBB.getParent();
103 const Function &F = MF->getFunction();
104 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
105
106 {
107 auto Op = MI.getOpcode();
108 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
109 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
110 }
111
112 // Meta-instructions emit no code.
113 if (MI.isMetaInstruction())
114 return 0;
115
116 // FIXME: We currently only handle pseudoinstructions that don't get expanded
117 // before the assembly printer.
118 unsigned NumBytes = 0;
119 const MCInstrDesc &Desc = MI.getDesc();
120
121 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
122 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
123
124 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
125 if (!MFI->shouldSignReturnAddress(*MF))
126 return NumBytes;
127
128 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
129 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
130 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
131 return NumBytes;
132 }
133
134 // Size should be preferably set in
135 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
136 // Specific cases handle instructions of variable sizes
137 switch (Desc.getOpcode()) {
138 default:
139 if (Desc.getSize())
140 return Desc.getSize();
141
142 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
143 // with fixed constant size but not specified in .td file) is a normal
144 // 4-byte insn.
145 NumBytes = 4;
146 break;
147 case TargetOpcode::STACKMAP:
148 // The upper bound for a stackmap intrinsic is the full length of its shadow
149 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
150 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
151 break;
152 case TargetOpcode::PATCHPOINT:
153 // The size of the patchpoint intrinsic is the number of bytes requested
154 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
155 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
156 break;
157 case TargetOpcode::STATEPOINT:
158 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
159 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
160 // No patch bytes means a normal call inst is emitted
161 if (NumBytes == 0)
162 NumBytes = 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
165 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
166 // instructions are expanded to the specified number of NOPs. Otherwise,
167 // they are expanded to 36-byte XRay sleds.
168 NumBytes =
169 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
170 break;
171 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
172 case TargetOpcode::PATCHABLE_TAIL_CALL:
173 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
174 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
175 NumBytes = 36;
176 break;
177 case TargetOpcode::PATCHABLE_EVENT_CALL:
178 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
179 NumBytes = 24;
180 break;
181
182 case AArch64::SPACE:
183 NumBytes = MI.getOperand(1).getImm();
184 break;
185 case TargetOpcode::BUNDLE:
186 NumBytes = getInstBundleLength(MI);
187 break;
188 }
189
190 return NumBytes;
191}
192
193unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
194 unsigned Size = 0;
196 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
197 while (++I != E && I->isInsideBundle()) {
198 assert(!I->isBundle() && "No nested bundle!");
200 }
201 return Size;
202}
203
206 // Block ends with fall-through condbranch.
207 switch (LastInst->getOpcode()) {
208 default:
209 llvm_unreachable("Unknown branch instruction?");
210 case AArch64::Bcc:
211 Target = LastInst->getOperand(1).getMBB();
212 Cond.push_back(LastInst->getOperand(0));
213 break;
214 case AArch64::CBZW:
215 case AArch64::CBZX:
216 case AArch64::CBNZW:
217 case AArch64::CBNZX:
218 Target = LastInst->getOperand(1).getMBB();
219 Cond.push_back(MachineOperand::CreateImm(-1));
220 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
221 Cond.push_back(LastInst->getOperand(0));
222 break;
223 case AArch64::TBZW:
224 case AArch64::TBZX:
225 case AArch64::TBNZW:
226 case AArch64::TBNZX:
227 Target = LastInst->getOperand(2).getMBB();
228 Cond.push_back(MachineOperand::CreateImm(-1));
229 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
230 Cond.push_back(LastInst->getOperand(0));
231 Cond.push_back(LastInst->getOperand(1));
232 break;
233 case AArch64::CBWPri:
234 case AArch64::CBXPri:
235 case AArch64::CBWPrr:
236 case AArch64::CBXPrr:
237 Target = LastInst->getOperand(3).getMBB();
238 Cond.push_back(MachineOperand::CreateImm(-1));
239 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
240 Cond.push_back(LastInst->getOperand(0));
241 Cond.push_back(LastInst->getOperand(1));
242 Cond.push_back(LastInst->getOperand(2));
243 break;
244 case AArch64::CBBAssertExt:
245 case AArch64::CBHAssertExt:
246 Target = LastInst->getOperand(3).getMBB();
247 Cond.push_back(MachineOperand::CreateImm(-1)); // -1
248 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); // Opc
249 Cond.push_back(LastInst->getOperand(0)); // Cond
250 Cond.push_back(LastInst->getOperand(1)); // Op0
251 Cond.push_back(LastInst->getOperand(2)); // Op1
252 Cond.push_back(LastInst->getOperand(4)); // Ext0
253 Cond.push_back(LastInst->getOperand(5)); // Ext1
254 break;
255 }
256}
257
258static unsigned getBranchDisplacementBits(unsigned Opc) {
259 switch (Opc) {
260 default:
261 llvm_unreachable("unexpected opcode!");
262 case AArch64::B:
263 return BDisplacementBits;
264 case AArch64::TBNZW:
265 case AArch64::TBZW:
266 case AArch64::TBNZX:
267 case AArch64::TBZX:
268 return TBZDisplacementBits;
269 case AArch64::CBNZW:
270 case AArch64::CBZW:
271 case AArch64::CBNZX:
272 case AArch64::CBZX:
273 return CBZDisplacementBits;
274 case AArch64::Bcc:
275 return BCCDisplacementBits;
276 case AArch64::CBWPri:
277 case AArch64::CBXPri:
278 case AArch64::CBBAssertExt:
279 case AArch64::CBHAssertExt:
280 case AArch64::CBWPrr:
281 case AArch64::CBXPrr:
282 return CBDisplacementBits;
283 }
284}
285
287 int64_t BrOffset) const {
288 unsigned Bits = getBranchDisplacementBits(BranchOp);
289 assert(Bits >= 3 && "max branch displacement must be enough to jump"
290 "over conditional branch expansion");
291 return isIntN(Bits, BrOffset / 4);
292}
293
296 switch (MI.getOpcode()) {
297 default:
298 llvm_unreachable("unexpected opcode!");
299 case AArch64::B:
300 return MI.getOperand(0).getMBB();
301 case AArch64::TBZW:
302 case AArch64::TBNZW:
303 case AArch64::TBZX:
304 case AArch64::TBNZX:
305 return MI.getOperand(2).getMBB();
306 case AArch64::CBZW:
307 case AArch64::CBNZW:
308 case AArch64::CBZX:
309 case AArch64::CBNZX:
310 case AArch64::Bcc:
311 return MI.getOperand(1).getMBB();
312 case AArch64::CBWPri:
313 case AArch64::CBXPri:
314 case AArch64::CBBAssertExt:
315 case AArch64::CBHAssertExt:
316 case AArch64::CBWPrr:
317 case AArch64::CBXPrr:
318 return MI.getOperand(3).getMBB();
319 }
320}
321
323 MachineBasicBlock &NewDestBB,
324 MachineBasicBlock &RestoreBB,
325 const DebugLoc &DL,
326 int64_t BrOffset,
327 RegScavenger *RS) const {
328 assert(RS && "RegScavenger required for long branching");
329 assert(MBB.empty() &&
330 "new block should be inserted for expanding unconditional branch");
331 assert(MBB.pred_size() == 1);
332 assert(RestoreBB.empty() &&
333 "restore block should be inserted for restoring clobbered registers");
334
335 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
336 // Offsets outside of the signed 33-bit range are not supported for ADRP +
337 // ADD.
338 if (!isInt<33>(BrOffset))
340 "Branch offsets outside of the signed 33-bit range not supported");
341
342 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
343 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
344 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
345 .addReg(Reg)
346 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
347 .addImm(0);
348 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
349 };
350
351 RS->enterBasicBlockEnd(MBB);
352 // If X16 is unused, we can rely on the linker to insert a range extension
353 // thunk if NewDestBB is out of range of a single B instruction.
354 constexpr Register Reg = AArch64::X16;
355 if (!RS->isRegUsed(Reg)) {
356 insertUnconditionalBranch(MBB, &NewDestBB, DL);
357 RS->setRegUsed(Reg);
358 return;
359 }
360
361 // If there's a free register and it's worth inflating the code size,
362 // manually insert the indirect branch.
363 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
364 if (Scavenged != AArch64::NoRegister &&
365 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
366 buildIndirectBranch(Scavenged, NewDestBB);
367 RS->setRegUsed(Scavenged);
368 return;
369 }
370
371 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
372 // with red zones.
373 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
374 if (!AFI || AFI->hasRedZone().value_or(true))
376 "Unable to insert indirect branch inside function that has red zone");
377
378 // Otherwise, spill X16 and defer range extension to the linker.
379 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
380 .addReg(AArch64::SP, RegState::Define)
381 .addReg(Reg)
382 .addReg(AArch64::SP)
383 .addImm(-16);
384
385 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
386
387 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
388 .addReg(AArch64::SP, RegState::Define)
390 .addReg(AArch64::SP)
391 .addImm(16);
392}
393
394// Branch analysis.
397 MachineBasicBlock *&FBB,
399 bool AllowModify) const {
400 // If the block has no terminators, it just falls into the block after it.
401 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
402 if (I == MBB.end())
403 return false;
404
405 // Skip over SpeculationBarrierEndBB terminators
406 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
407 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
408 --I;
409 }
410
411 if (!isUnpredicatedTerminator(*I))
412 return false;
413
414 // Get the last instruction in the block.
415 MachineInstr *LastInst = &*I;
416
417 // If there is only one terminator instruction, process it.
418 unsigned LastOpc = LastInst->getOpcode();
419 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
420 if (isUncondBranchOpcode(LastOpc)) {
421 TBB = LastInst->getOperand(0).getMBB();
422 return false;
423 }
424 if (isCondBranchOpcode(LastOpc)) {
425 // Block ends with fall-through condbranch.
426 parseCondBranch(LastInst, TBB, Cond);
427 return false;
428 }
429 return true; // Can't handle indirect branch.
430 }
431
432 // Get the instruction before it if it is a terminator.
433 MachineInstr *SecondLastInst = &*I;
434 unsigned SecondLastOpc = SecondLastInst->getOpcode();
435
436 // If AllowModify is true and the block ends with two or more unconditional
437 // branches, delete all but the first unconditional branch.
438 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
439 while (isUncondBranchOpcode(SecondLastOpc)) {
440 LastInst->eraseFromParent();
441 LastInst = SecondLastInst;
442 LastOpc = LastInst->getOpcode();
443 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
444 // Return now the only terminator is an unconditional branch.
445 TBB = LastInst->getOperand(0).getMBB();
446 return false;
447 }
448 SecondLastInst = &*I;
449 SecondLastOpc = SecondLastInst->getOpcode();
450 }
451 }
452
453 // If we're allowed to modify and the block ends in a unconditional branch
454 // which could simply fallthrough, remove the branch. (Note: This case only
455 // matters when we can't understand the whole sequence, otherwise it's also
456 // handled by BranchFolding.cpp.)
457 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
458 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
459 LastInst->eraseFromParent();
460 LastInst = SecondLastInst;
461 LastOpc = LastInst->getOpcode();
462 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
463 assert(!isUncondBranchOpcode(LastOpc) &&
464 "unreachable unconditional branches removed above");
465
466 if (isCondBranchOpcode(LastOpc)) {
467 // Block ends with fall-through condbranch.
468 parseCondBranch(LastInst, TBB, Cond);
469 return false;
470 }
471 return true; // Can't handle indirect branch.
472 }
473 SecondLastInst = &*I;
474 SecondLastOpc = SecondLastInst->getOpcode();
475 }
476
477 // If there are three terminators, we don't know what sort of block this is.
478 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
479 return true;
480
481 // If the block ends with a B and a Bcc, handle it.
482 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
483 parseCondBranch(SecondLastInst, TBB, Cond);
484 FBB = LastInst->getOperand(0).getMBB();
485 return false;
486 }
487
488 // If the block ends with two unconditional branches, handle it. The second
489 // one is not executed, so remove it.
490 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
491 TBB = SecondLastInst->getOperand(0).getMBB();
492 I = LastInst;
493 if (AllowModify)
494 I->eraseFromParent();
495 return false;
496 }
497
498 // ...likewise if it ends with an indirect branch followed by an unconditional
499 // branch.
500 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
501 I = LastInst;
502 if (AllowModify)
503 I->eraseFromParent();
504 return true;
505 }
506
507 // Otherwise, can't handle this.
508 return true;
509}
510
512 MachineBranchPredicate &MBP,
513 bool AllowModify) const {
514 // For the moment, handle only a block which ends with a cb(n)zx followed by
515 // a fallthrough. Why this? Because it is a common form.
516 // TODO: Should we handle b.cc?
517
518 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
519 if (I == MBB.end())
520 return true;
521
522 // Skip over SpeculationBarrierEndBB terminators
523 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
524 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
525 --I;
526 }
527
528 if (!isUnpredicatedTerminator(*I))
529 return true;
530
531 // Get the last instruction in the block.
532 MachineInstr *LastInst = &*I;
533 unsigned LastOpc = LastInst->getOpcode();
534 if (!isCondBranchOpcode(LastOpc))
535 return true;
536
537 switch (LastOpc) {
538 default:
539 return true;
540 case AArch64::CBZW:
541 case AArch64::CBZX:
542 case AArch64::CBNZW:
543 case AArch64::CBNZX:
544 break;
545 };
546
547 MBP.TrueDest = LastInst->getOperand(1).getMBB();
548 assert(MBP.TrueDest && "expected!");
549 MBP.FalseDest = MBB.getNextNode();
550
551 MBP.ConditionDef = nullptr;
552 MBP.SingleUseCondition = false;
553
554 MBP.LHS = LastInst->getOperand(0);
555 MBP.RHS = MachineOperand::CreateImm(0);
556 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
557 ? MachineBranchPredicate::PRED_NE
558 : MachineBranchPredicate::PRED_EQ;
559 return false;
560}
561
564 if (Cond[0].getImm() != -1) {
565 // Regular Bcc
566 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
568 } else {
569 // Folded compare-and-branch
570 switch (Cond[1].getImm()) {
571 default:
572 llvm_unreachable("Unknown conditional branch!");
573 case AArch64::CBZW:
574 Cond[1].setImm(AArch64::CBNZW);
575 break;
576 case AArch64::CBNZW:
577 Cond[1].setImm(AArch64::CBZW);
578 break;
579 case AArch64::CBZX:
580 Cond[1].setImm(AArch64::CBNZX);
581 break;
582 case AArch64::CBNZX:
583 Cond[1].setImm(AArch64::CBZX);
584 break;
585 case AArch64::TBZW:
586 Cond[1].setImm(AArch64::TBNZW);
587 break;
588 case AArch64::TBNZW:
589 Cond[1].setImm(AArch64::TBZW);
590 break;
591 case AArch64::TBZX:
592 Cond[1].setImm(AArch64::TBNZX);
593 break;
594 case AArch64::TBNZX:
595 Cond[1].setImm(AArch64::TBZX);
596 break;
597
598 // Cond is { -1, Opcode, CC, Op0, Op1, ... }
599 case AArch64::CBWPri:
600 case AArch64::CBXPri:
601 case AArch64::CBBAssertExt:
602 case AArch64::CBHAssertExt:
603 case AArch64::CBWPrr:
604 case AArch64::CBXPrr: {
605 // Pseudos using standard 4bit Arm condition codes
607 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
609 }
610 }
611 }
612
613 return false;
614}
615
617 int *BytesRemoved) const {
618 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
619 if (I == MBB.end())
620 return 0;
621
622 if (!isUncondBranchOpcode(I->getOpcode()) &&
623 !isCondBranchOpcode(I->getOpcode()))
624 return 0;
625
626 // Remove the branch.
627 I->eraseFromParent();
628
629 I = MBB.end();
630
631 if (I == MBB.begin()) {
632 if (BytesRemoved)
633 *BytesRemoved = 4;
634 return 1;
635 }
636 --I;
637 if (!isCondBranchOpcode(I->getOpcode())) {
638 if (BytesRemoved)
639 *BytesRemoved = 4;
640 return 1;
641 }
642
643 // Remove the branch.
644 I->eraseFromParent();
645 if (BytesRemoved)
646 *BytesRemoved = 8;
647
648 return 2;
649}
650
651void AArch64InstrInfo::instantiateCondBranch(
654 if (Cond[0].getImm() != -1) {
655 // Regular Bcc
656 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
657 } else {
658 // Folded compare-and-branch
659 // Note that we use addOperand instead of addReg to keep the flags.
660
661 // cbz, cbnz
662 const MachineInstrBuilder MIB =
663 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
664
665 // tbz/tbnz
666 if (Cond.size() > 3)
667 MIB.add(Cond[3]);
668
669 // cb
670 if (Cond.size() > 4)
671 MIB.add(Cond[4]);
672
673 MIB.addMBB(TBB);
674
675 // cb[b,h]
676 if (Cond.size() > 5) {
677 MIB.addImm(Cond[5].getImm());
678 MIB.addImm(Cond[6].getImm());
679 }
680 }
681}
682
685 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
686 // Shouldn't be a fall through.
687 assert(TBB && "insertBranch must not be told to insert a fallthrough");
688
689 if (!FBB) {
690 if (Cond.empty()) // Unconditional branch?
691 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
692 else
693 instantiateCondBranch(MBB, DL, TBB, Cond);
694
695 if (BytesAdded)
696 *BytesAdded = 4;
697
698 return 1;
699 }
700
701 // Two-way conditional branch.
702 instantiateCondBranch(MBB, DL, TBB, Cond);
703 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
704
705 if (BytesAdded)
706 *BytesAdded = 8;
707
708 return 2;
709}
710
712 const TargetInstrInfo &TII) {
713 for (MachineInstr &MI : MBB->terminators()) {
714 unsigned Opc = MI.getOpcode();
715 switch (Opc) {
716 case AArch64::CBZW:
717 case AArch64::CBZX:
718 case AArch64::TBZW:
719 case AArch64::TBZX:
720 // CBZ/TBZ with WZR/XZR -> unconditional B
721 if (MI.getOperand(0).getReg() == AArch64::WZR ||
722 MI.getOperand(0).getReg() == AArch64::XZR) {
723 DEBUG_WITH_TYPE("optimizeTerminators",
724 dbgs() << "Removing always taken branch: " << MI);
725 MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
726 SmallVector<MachineBasicBlock *> Succs(MBB->successors());
727 for (auto *S : Succs)
728 if (S != Target)
729 MBB->removeSuccessor(S);
730 DebugLoc DL = MI.getDebugLoc();
731 while (MBB->rbegin() != &MI)
732 MBB->rbegin()->eraseFromParent();
733 MI.eraseFromParent();
734 BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
735 return true;
736 }
737 break;
738 case AArch64::CBNZW:
739 case AArch64::CBNZX:
740 case AArch64::TBNZW:
741 case AArch64::TBNZX:
742 // CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
743 if (MI.getOperand(0).getReg() == AArch64::WZR ||
744 MI.getOperand(0).getReg() == AArch64::XZR) {
745 DEBUG_WITH_TYPE("optimizeTerminators",
746 dbgs() << "Removing never taken branch: " << MI);
747 MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
748 MI.getParent()->removeSuccessor(Target);
749 MI.eraseFromParent();
750 return true;
751 }
752 break;
753 }
754 }
755 return false;
756}
757
758// Find the original register that VReg is copied from.
759static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
760 while (Register::isVirtualRegister(VReg)) {
761 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
762 if (!DefMI->isFullCopy())
763 return VReg;
764 VReg = DefMI->getOperand(1).getReg();
765 }
766 return VReg;
767}
768
769// Determine if VReg is defined by an instruction that can be folded into a
770// csel instruction. If so, return the folded opcode, and the replacement
771// register.
772static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
773 unsigned *NewReg = nullptr) {
774 VReg = removeCopies(MRI, VReg);
776 return 0;
777
778 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
779 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
780 unsigned Opc = 0;
781 unsigned SrcReg = 0;
782 switch (DefMI->getOpcode()) {
783 case AArch64::SUBREG_TO_REG:
784 // Check for the following way to define an 64-bit immediate:
785 // %0:gpr32 = MOVi32imm 1
786 // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
787 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
788 return 0;
789 if (!DefMI->getOperand(2).isReg())
790 return 0;
791 if (!DefMI->getOperand(3).isImm() ||
792 DefMI->getOperand(3).getImm() != AArch64::sub_32)
793 return 0;
794 DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
795 if (DefMI->getOpcode() != AArch64::MOVi32imm)
796 return 0;
797 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
798 return 0;
799 assert(Is64Bit);
800 SrcReg = AArch64::XZR;
801 Opc = AArch64::CSINCXr;
802 break;
803
804 case AArch64::MOVi32imm:
805 case AArch64::MOVi64imm:
806 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
807 return 0;
808 SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
809 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
810 break;
811
812 case AArch64::ADDSXri:
813 case AArch64::ADDSWri:
814 // if NZCV is used, do not fold.
815 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
816 true) == -1)
817 return 0;
818 // fall-through to ADDXri and ADDWri.
819 [[fallthrough]];
820 case AArch64::ADDXri:
821 case AArch64::ADDWri:
822 // add x, 1 -> csinc.
823 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
824 DefMI->getOperand(3).getImm() != 0)
825 return 0;
826 SrcReg = DefMI->getOperand(1).getReg();
827 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
828 break;
829
830 case AArch64::ORNXrr:
831 case AArch64::ORNWrr: {
832 // not x -> csinv, represented as orn dst, xzr, src.
833 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
834 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
835 return 0;
836 SrcReg = DefMI->getOperand(2).getReg();
837 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
838 break;
839 }
840
841 case AArch64::SUBSXrr:
842 case AArch64::SUBSWrr:
843 // if NZCV is used, do not fold.
844 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
845 true) == -1)
846 return 0;
847 // fall-through to SUBXrr and SUBWrr.
848 [[fallthrough]];
849 case AArch64::SUBXrr:
850 case AArch64::SUBWrr: {
851 // neg x -> csneg, represented as sub dst, xzr, src.
852 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
853 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
854 return 0;
855 SrcReg = DefMI->getOperand(2).getReg();
856 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
857 break;
858 }
859 default:
860 return 0;
861 }
862 assert(Opc && SrcReg && "Missing parameters");
863
864 if (NewReg)
865 *NewReg = SrcReg;
866 return Opc;
867}
868
871 Register DstReg, Register TrueReg,
872 Register FalseReg, int &CondCycles,
873 int &TrueCycles,
874 int &FalseCycles) const {
875 // Check register classes.
876 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
877 const TargetRegisterClass *RC =
878 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
879 if (!RC)
880 return false;
881
882 // Also need to check the dest regclass, in case we're trying to optimize
883 // something like:
884 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
885 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
886 return false;
887
888 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
889 unsigned ExtraCondLat = Cond.size() != 1;
890
891 // GPRs are handled by csel.
892 // FIXME: Fold in x+1, -x, and ~x when applicable.
893 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
894 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
895 // Single-cycle csel, csinc, csinv, and csneg.
896 CondCycles = 1 + ExtraCondLat;
897 TrueCycles = FalseCycles = 1;
898 if (canFoldIntoCSel(MRI, TrueReg))
899 TrueCycles = 0;
900 else if (canFoldIntoCSel(MRI, FalseReg))
901 FalseCycles = 0;
902 return true;
903 }
904
905 // Scalar floating point is handled by fcsel.
906 // FIXME: Form fabs, fmin, and fmax when applicable.
907 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
908 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
909 CondCycles = 5 + ExtraCondLat;
910 TrueCycles = FalseCycles = 2;
911 return true;
912 }
913
914 // Can't do vectors.
915 return false;
916}
917
920 const DebugLoc &DL, Register DstReg,
922 Register TrueReg, Register FalseReg) const {
923 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
924
925 // Parse the condition code, see parseCondBranch() above.
927 switch (Cond.size()) {
928 default:
929 llvm_unreachable("Unknown condition opcode in Cond");
930 case 1: // b.cc
932 break;
933 case 3: { // cbz/cbnz
934 // We must insert a compare against 0.
935 bool Is64Bit;
936 switch (Cond[1].getImm()) {
937 default:
938 llvm_unreachable("Unknown branch opcode in Cond");
939 case AArch64::CBZW:
940 Is64Bit = false;
941 CC = AArch64CC::EQ;
942 break;
943 case AArch64::CBZX:
944 Is64Bit = true;
945 CC = AArch64CC::EQ;
946 break;
947 case AArch64::CBNZW:
948 Is64Bit = false;
949 CC = AArch64CC::NE;
950 break;
951 case AArch64::CBNZX:
952 Is64Bit = true;
953 CC = AArch64CC::NE;
954 break;
955 }
956 Register SrcReg = Cond[2].getReg();
957 if (Is64Bit) {
958 // cmp reg, #0 is actually subs xzr, reg, #0.
959 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
960 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
961 .addReg(SrcReg)
962 .addImm(0)
963 .addImm(0);
964 } else {
965 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
966 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
967 .addReg(SrcReg)
968 .addImm(0)
969 .addImm(0);
970 }
971 break;
972 }
973 case 4: { // tbz/tbnz
974 // We must insert a tst instruction.
975 switch (Cond[1].getImm()) {
976 default:
977 llvm_unreachable("Unknown branch opcode in Cond");
978 case AArch64::TBZW:
979 case AArch64::TBZX:
980 CC = AArch64CC::EQ;
981 break;
982 case AArch64::TBNZW:
983 case AArch64::TBNZX:
984 CC = AArch64CC::NE;
985 break;
986 }
987 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
988 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
989 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
990 .addReg(Cond[2].getReg())
991 .addImm(
993 else
994 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
995 .addReg(Cond[2].getReg())
996 .addImm(
998 break;
999 }
1000 case 5: { // cb
1001 // We must insert a cmp, that is a subs
1002 // 0 1 2 3 4
1003 // Cond is { -1, Opcode, CC, Op0, Op1 }
1004
1005 unsigned SubsOpc, SubsDestReg;
1006 bool IsImm = false;
1007 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1008 switch (Cond[1].getImm()) {
1009 default:
1010 llvm_unreachable("Unknown branch opcode in Cond");
1011 case AArch64::CBWPri:
1012 SubsOpc = AArch64::SUBSWri;
1013 SubsDestReg = AArch64::WZR;
1014 IsImm = true;
1015 break;
1016 case AArch64::CBXPri:
1017 SubsOpc = AArch64::SUBSXri;
1018 SubsDestReg = AArch64::XZR;
1019 IsImm = true;
1020 break;
1021 case AArch64::CBWPrr:
1022 SubsOpc = AArch64::SUBSWrr;
1023 SubsDestReg = AArch64::WZR;
1024 IsImm = false;
1025 break;
1026 case AArch64::CBXPrr:
1027 SubsOpc = AArch64::SUBSXrr;
1028 SubsDestReg = AArch64::XZR;
1029 IsImm = false;
1030 break;
1031 }
1032
1033 if (IsImm)
1034 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
1035 .addReg(Cond[3].getReg())
1036 .addImm(Cond[4].getImm())
1037 .addImm(0);
1038 else
1039 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
1040 .addReg(Cond[3].getReg())
1041 .addReg(Cond[4].getReg());
1042 } break;
1043 case 7: { // cb[b,h]
1044 // We must insert a cmp, that is a subs, but also zero- or sign-extensions
1045 // that have been folded. For the first operand we codegen an explicit
1046 // extension, for the second operand we fold the extension into cmp.
1047 // 0 1 2 3 4 5 6
1048 // Cond is { -1, Opcode, CC, Op0, Op1, Ext0, Ext1 }
1049
1050 // We need a new register for the now explicitly extended register
1051 Register Reg = Cond[4].getReg();
1053 unsigned ExtOpc;
1054 unsigned ExtBits;
1055 AArch64_AM::ShiftExtendType ExtendType =
1057 switch (ExtendType) {
1058 default:
1059 llvm_unreachable("Unknown shift-extend for CB instruction");
1060 case AArch64_AM::SXTB:
1061 assert(
1062 Cond[1].getImm() == AArch64::CBBAssertExt &&
1063 "Unexpected compare-and-branch instruction for SXTB shift-extend");
1064 ExtOpc = AArch64::SBFMWri;
1065 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1066 break;
1067 case AArch64_AM::SXTH:
1068 assert(
1069 Cond[1].getImm() == AArch64::CBHAssertExt &&
1070 "Unexpected compare-and-branch instruction for SXTH shift-extend");
1071 ExtOpc = AArch64::SBFMWri;
1072 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1073 break;
1074 case AArch64_AM::UXTB:
1075 assert(
1076 Cond[1].getImm() == AArch64::CBBAssertExt &&
1077 "Unexpected compare-and-branch instruction for UXTB shift-extend");
1078 ExtOpc = AArch64::ANDWri;
1079 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1080 break;
1081 case AArch64_AM::UXTH:
1082 assert(
1083 Cond[1].getImm() == AArch64::CBHAssertExt &&
1084 "Unexpected compare-and-branch instruction for UXTH shift-extend");
1085 ExtOpc = AArch64::ANDWri;
1086 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1087 break;
1088 }
1089
1090 // Build the explicit extension of the first operand
1091 Reg = MRI.createVirtualRegister(&AArch64::GPR32spRegClass);
1093 BuildMI(MBB, I, DL, get(ExtOpc), Reg).addReg(Cond[4].getReg());
1094 if (ExtOpc != AArch64::ANDWri)
1095 MBBI.addImm(0);
1096 MBBI.addImm(ExtBits);
1097 }
1098
1099 // Now, subs with an extended second operand
1101 AArch64_AM::ShiftExtendType ExtendType =
1103 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1104 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1105 BuildMI(MBB, I, DL, get(AArch64::SUBSWrx), AArch64::WZR)
1106 .addReg(Cond[3].getReg())
1107 .addReg(Reg)
1108 .addImm(AArch64_AM::getArithExtendImm(ExtendType, 0));
1109 } // If no extension is needed, just a regular subs
1110 else {
1111 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1112 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1113 BuildMI(MBB, I, DL, get(AArch64::SUBSWrr), AArch64::WZR)
1114 .addReg(Cond[3].getReg())
1115 .addReg(Reg);
1116 }
1117
1118 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1119 } break;
1120 }
1121
1122 unsigned Opc = 0;
1123 const TargetRegisterClass *RC = nullptr;
1124 bool TryFold = false;
1125 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
1126 RC = &AArch64::GPR64RegClass;
1127 Opc = AArch64::CSELXr;
1128 TryFold = true;
1129 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
1130 RC = &AArch64::GPR32RegClass;
1131 Opc = AArch64::CSELWr;
1132 TryFold = true;
1133 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
1134 RC = &AArch64::FPR64RegClass;
1135 Opc = AArch64::FCSELDrrr;
1136 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
1137 RC = &AArch64::FPR32RegClass;
1138 Opc = AArch64::FCSELSrrr;
1139 }
1140 assert(RC && "Unsupported regclass");
1141
1142 // Try folding simple instructions into the csel.
1143 if (TryFold) {
1144 unsigned NewReg = 0;
1145 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
1146 if (FoldedOpc) {
1147 // The folded opcodes csinc, csinc and csneg apply the operation to
1148 // FalseReg, so we need to invert the condition.
1150 TrueReg = FalseReg;
1151 } else
1152 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
1153
1154 // Fold the operation. Leave any dead instructions for DCE to clean up.
1155 if (FoldedOpc) {
1156 FalseReg = NewReg;
1157 Opc = FoldedOpc;
1158 // Extend the live range of NewReg.
1159 MRI.clearKillFlags(NewReg);
1160 }
1161 }
1162
1163 // Pull all virtual register into the appropriate class.
1164 MRI.constrainRegClass(TrueReg, RC);
1165 // FalseReg might be WZR or XZR if the folded operand is a literal 1.
1166 assert(
1167 (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
1168 FalseReg == AArch64::XZR) &&
1169 "FalseReg was folded into a non-virtual register other than WZR or XZR");
1170 if (FalseReg.isVirtual())
1171 MRI.constrainRegClass(FalseReg, RC);
1172
1173 // Insert the csel.
1174 BuildMI(MBB, I, DL, get(Opc), DstReg)
1175 .addReg(TrueReg)
1176 .addReg(FalseReg)
1177 .addImm(CC);
1178}
1179
1180// Return true if Imm can be loaded into a register by a "cheap" sequence of
1181// instructions. For now, "cheap" means at most two instructions.
1182static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1183 if (BitSize == 32)
1184 return true;
1185
1186 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1187 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1189 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1190
1191 return Is.size() <= 2;
1192}
1193
1194// Check if a COPY instruction is cheap.
1195static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI) {
1196 assert(MI.isCopy() && "Expected COPY instruction");
1197 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1198
1199 // Cross-bank copies (e.g., between GPR and FPR) are expensive on AArch64,
1200 // typically requiring an FMOV instruction with a 2-6 cycle latency.
1201 auto GetRegClass = [&](Register Reg) -> const TargetRegisterClass * {
1202 if (Reg.isVirtual())
1203 return MRI.getRegClass(Reg);
1204 if (Reg.isPhysical())
1205 return RI.getMinimalPhysRegClass(Reg);
1206 return nullptr;
1207 };
1208 const TargetRegisterClass *DstRC = GetRegClass(MI.getOperand(0).getReg());
1209 const TargetRegisterClass *SrcRC = GetRegClass(MI.getOperand(1).getReg());
1210 if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC))
1211 return false;
1212
1213 return MI.isAsCheapAsAMove();
1214}
1215
1216// FIXME: this implementation should be micro-architecture dependent, so a
1217// micro-architecture target hook should be introduced here in future.
1219 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1220 if (isExynosCheapAsMove(MI))
1221 return true;
1222 return MI.isAsCheapAsAMove();
1223 }
1224
1225 switch (MI.getOpcode()) {
1226 default:
1227 return MI.isAsCheapAsAMove();
1228
1229 case TargetOpcode::COPY:
1230 return isCheapCopy(MI, RI);
1231
1232 case AArch64::ADDWrs:
1233 case AArch64::ADDXrs:
1234 case AArch64::SUBWrs:
1235 case AArch64::SUBXrs:
1236 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1237
1238 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1239 // ORRXri, it is as cheap as MOV.
1240 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1241 case AArch64::MOVi32imm:
1242 return isCheapImmediate(MI, 32);
1243 case AArch64::MOVi64imm:
1244 return isCheapImmediate(MI, 64);
1245 }
1246}
1247
1248bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1249 switch (MI.getOpcode()) {
1250 default:
1251 return false;
1252
1253 case AArch64::ADDWrs:
1254 case AArch64::ADDXrs:
1255 case AArch64::ADDSWrs:
1256 case AArch64::ADDSXrs: {
1257 unsigned Imm = MI.getOperand(3).getImm();
1258 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1259 if (ShiftVal == 0)
1260 return true;
1261 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1262 }
1263
1264 case AArch64::ADDWrx:
1265 case AArch64::ADDXrx:
1266 case AArch64::ADDXrx64:
1267 case AArch64::ADDSWrx:
1268 case AArch64::ADDSXrx:
1269 case AArch64::ADDSXrx64: {
1270 unsigned Imm = MI.getOperand(3).getImm();
1271 switch (AArch64_AM::getArithExtendType(Imm)) {
1272 default:
1273 return false;
1274 case AArch64_AM::UXTB:
1275 case AArch64_AM::UXTH:
1276 case AArch64_AM::UXTW:
1277 case AArch64_AM::UXTX:
1278 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1279 }
1280 }
1281
1282 case AArch64::SUBWrs:
1283 case AArch64::SUBSWrs: {
1284 unsigned Imm = MI.getOperand(3).getImm();
1285 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1286 return ShiftVal == 0 ||
1287 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1288 }
1289
1290 case AArch64::SUBXrs:
1291 case AArch64::SUBSXrs: {
1292 unsigned Imm = MI.getOperand(3).getImm();
1293 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1294 return ShiftVal == 0 ||
1295 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1296 }
1297
1298 case AArch64::SUBWrx:
1299 case AArch64::SUBXrx:
1300 case AArch64::SUBXrx64:
1301 case AArch64::SUBSWrx:
1302 case AArch64::SUBSXrx:
1303 case AArch64::SUBSXrx64: {
1304 unsigned Imm = MI.getOperand(3).getImm();
1305 switch (AArch64_AM::getArithExtendType(Imm)) {
1306 default:
1307 return false;
1308 case AArch64_AM::UXTB:
1309 case AArch64_AM::UXTH:
1310 case AArch64_AM::UXTW:
1311 case AArch64_AM::UXTX:
1312 return AArch64_AM::getArithShiftValue(Imm) == 0;
1313 }
1314 }
1315
1316 case AArch64::LDRBBroW:
1317 case AArch64::LDRBBroX:
1318 case AArch64::LDRBroW:
1319 case AArch64::LDRBroX:
1320 case AArch64::LDRDroW:
1321 case AArch64::LDRDroX:
1322 case AArch64::LDRHHroW:
1323 case AArch64::LDRHHroX:
1324 case AArch64::LDRHroW:
1325 case AArch64::LDRHroX:
1326 case AArch64::LDRQroW:
1327 case AArch64::LDRQroX:
1328 case AArch64::LDRSBWroW:
1329 case AArch64::LDRSBWroX:
1330 case AArch64::LDRSBXroW:
1331 case AArch64::LDRSBXroX:
1332 case AArch64::LDRSHWroW:
1333 case AArch64::LDRSHWroX:
1334 case AArch64::LDRSHXroW:
1335 case AArch64::LDRSHXroX:
1336 case AArch64::LDRSWroW:
1337 case AArch64::LDRSWroX:
1338 case AArch64::LDRSroW:
1339 case AArch64::LDRSroX:
1340 case AArch64::LDRWroW:
1341 case AArch64::LDRWroX:
1342 case AArch64::LDRXroW:
1343 case AArch64::LDRXroX:
1344 case AArch64::PRFMroW:
1345 case AArch64::PRFMroX:
1346 case AArch64::STRBBroW:
1347 case AArch64::STRBBroX:
1348 case AArch64::STRBroW:
1349 case AArch64::STRBroX:
1350 case AArch64::STRDroW:
1351 case AArch64::STRDroX:
1352 case AArch64::STRHHroW:
1353 case AArch64::STRHHroX:
1354 case AArch64::STRHroW:
1355 case AArch64::STRHroX:
1356 case AArch64::STRQroW:
1357 case AArch64::STRQroX:
1358 case AArch64::STRSroW:
1359 case AArch64::STRSroX:
1360 case AArch64::STRWroW:
1361 case AArch64::STRWroX:
1362 case AArch64::STRXroW:
1363 case AArch64::STRXroX: {
1364 unsigned IsSigned = MI.getOperand(3).getImm();
1365 return !IsSigned;
1366 }
1367 }
1368}
1369
1370bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1371 unsigned Opc = MI.getOpcode();
1372 switch (Opc) {
1373 default:
1374 return false;
1375 case AArch64::SEH_StackAlloc:
1376 case AArch64::SEH_SaveFPLR:
1377 case AArch64::SEH_SaveFPLR_X:
1378 case AArch64::SEH_SaveReg:
1379 case AArch64::SEH_SaveReg_X:
1380 case AArch64::SEH_SaveRegP:
1381 case AArch64::SEH_SaveRegP_X:
1382 case AArch64::SEH_SaveFReg:
1383 case AArch64::SEH_SaveFReg_X:
1384 case AArch64::SEH_SaveFRegP:
1385 case AArch64::SEH_SaveFRegP_X:
1386 case AArch64::SEH_SetFP:
1387 case AArch64::SEH_AddFP:
1388 case AArch64::SEH_Nop:
1389 case AArch64::SEH_PrologEnd:
1390 case AArch64::SEH_EpilogStart:
1391 case AArch64::SEH_EpilogEnd:
1392 case AArch64::SEH_PACSignLR:
1393 case AArch64::SEH_SaveAnyRegI:
1394 case AArch64::SEH_SaveAnyRegIP:
1395 case AArch64::SEH_SaveAnyRegQP:
1396 case AArch64::SEH_SaveAnyRegQPX:
1397 case AArch64::SEH_AllocZ:
1398 case AArch64::SEH_SaveZReg:
1399 case AArch64::SEH_SavePReg:
1400 return true;
1401 }
1402}
1403
1405 Register &SrcReg, Register &DstReg,
1406 unsigned &SubIdx) const {
1407 switch (MI.getOpcode()) {
1408 default:
1409 return false;
1410 case AArch64::SBFMXri: // aka sxtw
1411 case AArch64::UBFMXri: // aka uxtw
1412 // Check for the 32 -> 64 bit extension case, these instructions can do
1413 // much more.
1414 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1415 return false;
1416 // This is a signed or unsigned 32 -> 64 bit extension.
1417 SrcReg = MI.getOperand(1).getReg();
1418 DstReg = MI.getOperand(0).getReg();
1419 SubIdx = AArch64::sub_32;
1420 return true;
1421 }
1422}
1423
1425 const MachineInstr &MIa, const MachineInstr &MIb) const {
1427 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1428 int64_t OffsetA = 0, OffsetB = 0;
1429 TypeSize WidthA(0, false), WidthB(0, false);
1430 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1431
1432 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1433 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1434
1437 return false;
1438
1439 // Retrieve the base, offset from the base and width. Width
1440 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1441 // base are identical, and the offset of a lower memory access +
1442 // the width doesn't overlap the offset of a higher memory access,
1443 // then the memory accesses are different.
1444 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1445 // are assumed to have the same scale (vscale).
1446 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1447 WidthA, TRI) &&
1448 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1449 WidthB, TRI)) {
1450 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1451 OffsetAIsScalable == OffsetBIsScalable) {
1452 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1453 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1454 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1455 if (LowWidth.isScalable() == OffsetAIsScalable &&
1456 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1457 return true;
1458 }
1459 }
1460 return false;
1461}
1462
1464 const MachineBasicBlock *MBB,
1465 const MachineFunction &MF) const {
1467 return true;
1468
1469 // Do not move an instruction that can be recognized as a branch target.
1470 if (hasBTISemantics(MI))
1471 return true;
1472
1473 switch (MI.getOpcode()) {
1474 case AArch64::HINT:
1475 // CSDB hints are scheduling barriers.
1476 if (MI.getOperand(0).getImm() == 0x14)
1477 return true;
1478 break;
1479 case AArch64::DSB:
1480 case AArch64::ISB:
1481 // DSB and ISB also are scheduling barriers.
1482 return true;
1483 case AArch64::MSRpstatesvcrImm1:
1484 // SMSTART and SMSTOP are also scheduling barriers.
1485 return true;
1486 default:;
1487 }
1488 if (isSEHInstruction(MI))
1489 return true;
1490 auto Next = std::next(MI.getIterator());
1491 return Next != MBB->end() && Next->isCFIInstruction();
1492}
1493
1494/// analyzeCompare - For a comparison instruction, return the source registers
1495/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1496/// Return true if the comparison instruction can be analyzed.
1498 Register &SrcReg2, int64_t &CmpMask,
1499 int64_t &CmpValue) const {
1500 // The first operand can be a frame index where we'd normally expect a
1501 // register.
1502 // FIXME: Pass subregisters out of analyzeCompare
1503 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1504 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1505 return false;
1506
1507 switch (MI.getOpcode()) {
1508 default:
1509 break;
1510 case AArch64::PTEST_PP:
1511 case AArch64::PTEST_PP_ANY:
1512 case AArch64::PTEST_PP_FIRST:
1513 SrcReg = MI.getOperand(0).getReg();
1514 SrcReg2 = MI.getOperand(1).getReg();
1515 if (MI.getOperand(2).getSubReg())
1516 return false;
1517
1518 // Not sure about the mask and value for now...
1519 CmpMask = ~0;
1520 CmpValue = 0;
1521 return true;
1522 case AArch64::SUBSWrr:
1523 case AArch64::SUBSWrs:
1524 case AArch64::SUBSWrx:
1525 case AArch64::SUBSXrr:
1526 case AArch64::SUBSXrs:
1527 case AArch64::SUBSXrx:
1528 case AArch64::ADDSWrr:
1529 case AArch64::ADDSWrs:
1530 case AArch64::ADDSWrx:
1531 case AArch64::ADDSXrr:
1532 case AArch64::ADDSXrs:
1533 case AArch64::ADDSXrx:
1534 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1535 SrcReg = MI.getOperand(1).getReg();
1536 SrcReg2 = MI.getOperand(2).getReg();
1537
1538 // FIXME: Pass subregisters out of analyzeCompare
1539 if (MI.getOperand(2).getSubReg())
1540 return false;
1541
1542 CmpMask = ~0;
1543 CmpValue = 0;
1544 return true;
1545 case AArch64::SUBSWri:
1546 case AArch64::ADDSWri:
1547 case AArch64::SUBSXri:
1548 case AArch64::ADDSXri:
1549 SrcReg = MI.getOperand(1).getReg();
1550 SrcReg2 = 0;
1551 CmpMask = ~0;
1552 CmpValue = MI.getOperand(2).getImm();
1553 return true;
1554 case AArch64::ANDSWri:
1555 case AArch64::ANDSXri:
1556 // ANDS does not use the same encoding scheme as the others xxxS
1557 // instructions.
1558 SrcReg = MI.getOperand(1).getReg();
1559 SrcReg2 = 0;
1560 CmpMask = ~0;
1562 MI.getOperand(2).getImm(),
1563 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1564 return true;
1565 }
1566
1567 return false;
1568}
1569
1571 MachineBasicBlock *MBB = Instr.getParent();
1572 assert(MBB && "Can't get MachineBasicBlock here");
1573 MachineFunction *MF = MBB->getParent();
1574 assert(MF && "Can't get MachineFunction here");
1578
1579 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1580 ++OpIdx) {
1581 MachineOperand &MO = Instr.getOperand(OpIdx);
1582 const TargetRegisterClass *OpRegCstraints =
1583 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1584
1585 // If there's no constraint, there's nothing to do.
1586 if (!OpRegCstraints)
1587 continue;
1588 // If the operand is a frame index, there's nothing to do here.
1589 // A frame index operand will resolve correctly during PEI.
1590 if (MO.isFI())
1591 continue;
1592
1593 assert(MO.isReg() &&
1594 "Operand has register constraints without being a register!");
1595
1596 Register Reg = MO.getReg();
1597 if (Reg.isPhysical()) {
1598 if (!OpRegCstraints->contains(Reg))
1599 return false;
1600 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1601 !MRI->constrainRegClass(Reg, OpRegCstraints))
1602 return false;
1603 }
1604
1605 return true;
1606}
1607
1608/// Return the opcode that does not set flags when possible - otherwise
1609/// return the original opcode. The caller is responsible to do the actual
1610/// substitution and legality checking.
1612 // Don't convert all compare instructions, because for some the zero register
1613 // encoding becomes the sp register.
1614 bool MIDefinesZeroReg = false;
1615 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1616 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1617 MIDefinesZeroReg = true;
1618
1619 switch (MI.getOpcode()) {
1620 default:
1621 return MI.getOpcode();
1622 case AArch64::ADDSWrr:
1623 return AArch64::ADDWrr;
1624 case AArch64::ADDSWri:
1625 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1626 case AArch64::ADDSWrs:
1627 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1628 case AArch64::ADDSWrx:
1629 return AArch64::ADDWrx;
1630 case AArch64::ADDSXrr:
1631 return AArch64::ADDXrr;
1632 case AArch64::ADDSXri:
1633 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1634 case AArch64::ADDSXrs:
1635 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1636 case AArch64::ADDSXrx:
1637 return AArch64::ADDXrx;
1638 case AArch64::SUBSWrr:
1639 return AArch64::SUBWrr;
1640 case AArch64::SUBSWri:
1641 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1642 case AArch64::SUBSWrs:
1643 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1644 case AArch64::SUBSWrx:
1645 return AArch64::SUBWrx;
1646 case AArch64::SUBSXrr:
1647 return AArch64::SUBXrr;
1648 case AArch64::SUBSXri:
1649 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1650 case AArch64::SUBSXrs:
1651 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1652 case AArch64::SUBSXrx:
1653 return AArch64::SUBXrx;
1654 }
1655}
1656
1657enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1658
1659/// True when condition flags are accessed (either by writing or reading)
1660/// on the instruction trace starting at From and ending at To.
1661///
1662/// Note: If From and To are from different blocks it's assumed CC are accessed
1663/// on the path.
1666 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1667 // Early exit if To is at the beginning of the BB.
1668 if (To == To->getParent()->begin())
1669 return true;
1670
1671 // Check whether the instructions are in the same basic block
1672 // If not, assume the condition flags might get modified somewhere.
1673 if (To->getParent() != From->getParent())
1674 return true;
1675
1676 // From must be above To.
1677 assert(std::any_of(
1678 ++To.getReverse(), To->getParent()->rend(),
1679 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1680
1681 // We iterate backward starting at \p To until we hit \p From.
1682 for (const MachineInstr &Instr :
1684 if (((AccessToCheck & AK_Write) &&
1685 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1686 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1687 return true;
1688 }
1689 return false;
1690}
1691
1692std::optional<unsigned>
1693AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1694 MachineInstr *Pred,
1695 const MachineRegisterInfo *MRI) const {
1696 unsigned MaskOpcode = Mask->getOpcode();
1697 unsigned PredOpcode = Pred->getOpcode();
1698 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1699 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1700
1701 if (PredIsWhileLike) {
1702 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1703 // instruction and the condition is "any" since WHILcc does an implicit
1704 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1705 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1706 return PredOpcode;
1707
1708 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1709 // redundant since WHILE performs an implicit PTEST with an all active
1710 // mask.
1711 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1712 getElementSizeForOpcode(MaskOpcode) ==
1713 getElementSizeForOpcode(PredOpcode))
1714 return PredOpcode;
1715
1716 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1717 // WHILEcc performs an implicit PTEST with an all active mask, setting
1718 // the N flag as the PTEST_FIRST would.
1719 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1720 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1721 return PredOpcode;
1722
1723 return {};
1724 }
1725
1726 if (PredIsPTestLike) {
1727 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1728 // instruction that sets the flags as PTEST would and the condition is
1729 // "any" since PG is always a subset of the governing predicate of the
1730 // ptest-like instruction.
1731 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1732 return PredOpcode;
1733
1734 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1735
1736 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1737 // to look through a copy and try again. This is because some instructions
1738 // take a predicate whose register class is a subset of its result class.
1739 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1740 PTestLikeMask->getOperand(1).getReg().isVirtual())
1741 PTestLikeMask =
1742 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1743
1744 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1745 // the element size matches and either the PTEST_LIKE instruction uses
1746 // the same all active mask or the condition is "any".
1747 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1748 getElementSizeForOpcode(MaskOpcode) ==
1749 getElementSizeForOpcode(PredOpcode)) {
1750 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1751 return PredOpcode;
1752 }
1753
1754 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1755 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1756 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1757 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1758 // performed by the compare could consider fewer lanes for these element
1759 // sizes.
1760 //
1761 // For example, consider
1762 //
1763 // ptrue p0.b ; P0=1111-1111-1111-1111
1764 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1765 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1766 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1767 // ; ^ last active
1768 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1769 // ; ^ last active
1770 //
1771 // where the compare generates a canonical all active 32-bit predicate
1772 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1773 // active flag, whereas the PTEST instruction with the same mask doesn't.
1774 // For PTEST_ANY this doesn't apply as the flags in this case would be
1775 // identical regardless of element size.
1776 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1777 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1778 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1779 return PredOpcode;
1780
1781 return {};
1782 }
1783
1784 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1785 // opcode so the PTEST becomes redundant.
1786 switch (PredOpcode) {
1787 case AArch64::AND_PPzPP:
1788 case AArch64::BIC_PPzPP:
1789 case AArch64::EOR_PPzPP:
1790 case AArch64::NAND_PPzPP:
1791 case AArch64::NOR_PPzPP:
1792 case AArch64::ORN_PPzPP:
1793 case AArch64::ORR_PPzPP:
1794 case AArch64::BRKA_PPzP:
1795 case AArch64::BRKPA_PPzPP:
1796 case AArch64::BRKB_PPzP:
1797 case AArch64::BRKPB_PPzPP:
1798 case AArch64::RDFFR_PPz: {
1799 // Check to see if our mask is the same. If not the resulting flag bits
1800 // may be different and we can't remove the ptest.
1801 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1802 if (Mask != PredMask)
1803 return {};
1804 break;
1805 }
1806 case AArch64::BRKN_PPzP: {
1807 // BRKN uses an all active implicit mask to set flags unlike the other
1808 // flag-setting instructions.
1809 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1810 if ((MaskOpcode != AArch64::PTRUE_B) ||
1811 (Mask->getOperand(1).getImm() != 31))
1812 return {};
1813 break;
1814 }
1815 case AArch64::PTRUE_B:
1816 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1817 break;
1818 default:
1819 // Bail out if we don't recognize the input
1820 return {};
1821 }
1822
1823 return convertToFlagSettingOpc(PredOpcode);
1824}
1825
1826/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1827/// operation which could set the flags in an identical manner
1828bool AArch64InstrInfo::optimizePTestInstr(
1829 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1830 const MachineRegisterInfo *MRI) const {
1831 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1832 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1833
1834 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1835 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1836 // before the branch to extract each subregister.
1837 auto Op = Pred->getOperand(1);
1838 if (Op.isReg() && Op.getReg().isVirtual() &&
1839 Op.getSubReg() == AArch64::psub0)
1840 Pred = MRI->getUniqueVRegDef(Op.getReg());
1841 }
1842
1843 unsigned PredOpcode = Pred->getOpcode();
1844 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1845 if (!NewOp)
1846 return false;
1847
1848 const TargetRegisterInfo *TRI = &getRegisterInfo();
1849
1850 // If another instruction between Pred and PTest accesses flags, don't remove
1851 // the ptest or update the earlier instruction to modify them.
1852 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1853 return false;
1854
1855 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1856 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1857 // operand to be replaced with an equivalent instruction that also sets the
1858 // flags.
1859 PTest->eraseFromParent();
1860 if (*NewOp != PredOpcode) {
1861 Pred->setDesc(get(*NewOp));
1862 bool succeeded = UpdateOperandRegClass(*Pred);
1863 (void)succeeded;
1864 assert(succeeded && "Operands have incompatible register classes!");
1865 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1866 }
1867
1868 // Ensure that the flags def is live.
1869 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1870 unsigned i = 0, e = Pred->getNumOperands();
1871 for (; i != e; ++i) {
1872 MachineOperand &MO = Pred->getOperand(i);
1873 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1874 MO.setIsDead(false);
1875 break;
1876 }
1877 }
1878 }
1879 return true;
1880}
1881
1882/// Try to optimize a compare instruction. A compare instruction is an
1883/// instruction which produces AArch64::NZCV. It can be truly compare
1884/// instruction
1885/// when there are no uses of its destination register.
1886///
1887/// The following steps are tried in order:
1888/// 1. Convert CmpInstr into an unconditional version.
1889/// 2. Remove CmpInstr if above there is an instruction producing a needed
1890/// condition code or an instruction which can be converted into such an
1891/// instruction.
1892/// Only comparison with zero is supported.
1894 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1895 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1896 assert(CmpInstr.getParent());
1897 assert(MRI);
1898
1899 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1900 int DeadNZCVIdx =
1901 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1902 if (DeadNZCVIdx != -1) {
1903 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1904 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1905 CmpInstr.eraseFromParent();
1906 return true;
1907 }
1908 unsigned Opc = CmpInstr.getOpcode();
1909 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1910 if (NewOpc == Opc)
1911 return false;
1912 const MCInstrDesc &MCID = get(NewOpc);
1913 CmpInstr.setDesc(MCID);
1914 CmpInstr.removeOperand(DeadNZCVIdx);
1915 bool succeeded = UpdateOperandRegClass(CmpInstr);
1916 (void)succeeded;
1917 assert(succeeded && "Some operands reg class are incompatible!");
1918 return true;
1919 }
1920
1921 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1922 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1923 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1924 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1925
1926 if (SrcReg2 != 0)
1927 return false;
1928
1929 // CmpInstr is a Compare instruction if destination register is not used.
1930 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1931 return false;
1932
1933 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1934 return true;
1935 return (CmpValue == 0 || CmpValue == 1) &&
1936 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1937}
1938
1939/// Get opcode of S version of Instr.
1940/// If Instr is S version its opcode is returned.
1941/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1942/// or we are not interested in it.
1943static unsigned sForm(MachineInstr &Instr) {
1944 switch (Instr.getOpcode()) {
1945 default:
1946 return AArch64::INSTRUCTION_LIST_END;
1947
1948 case AArch64::ADDSWrr:
1949 case AArch64::ADDSWri:
1950 case AArch64::ADDSXrr:
1951 case AArch64::ADDSXri:
1952 case AArch64::ADDSWrx:
1953 case AArch64::ADDSXrx:
1954 case AArch64::SUBSWrr:
1955 case AArch64::SUBSWri:
1956 case AArch64::SUBSWrx:
1957 case AArch64::SUBSXrr:
1958 case AArch64::SUBSXri:
1959 case AArch64::SUBSXrx:
1960 case AArch64::ANDSWri:
1961 case AArch64::ANDSWrr:
1962 case AArch64::ANDSWrs:
1963 case AArch64::ANDSXri:
1964 case AArch64::ANDSXrr:
1965 case AArch64::ANDSXrs:
1966 case AArch64::BICSWrr:
1967 case AArch64::BICSXrr:
1968 case AArch64::BICSWrs:
1969 case AArch64::BICSXrs:
1970 return Instr.getOpcode();
1971
1972 case AArch64::ADDWrr:
1973 return AArch64::ADDSWrr;
1974 case AArch64::ADDWri:
1975 return AArch64::ADDSWri;
1976 case AArch64::ADDXrr:
1977 return AArch64::ADDSXrr;
1978 case AArch64::ADDXri:
1979 return AArch64::ADDSXri;
1980 case AArch64::ADDWrx:
1981 return AArch64::ADDSWrx;
1982 case AArch64::ADDXrx:
1983 return AArch64::ADDSXrx;
1984 case AArch64::ADCWr:
1985 return AArch64::ADCSWr;
1986 case AArch64::ADCXr:
1987 return AArch64::ADCSXr;
1988 case AArch64::SUBWrr:
1989 return AArch64::SUBSWrr;
1990 case AArch64::SUBWri:
1991 return AArch64::SUBSWri;
1992 case AArch64::SUBXrr:
1993 return AArch64::SUBSXrr;
1994 case AArch64::SUBXri:
1995 return AArch64::SUBSXri;
1996 case AArch64::SUBWrx:
1997 return AArch64::SUBSWrx;
1998 case AArch64::SUBXrx:
1999 return AArch64::SUBSXrx;
2000 case AArch64::SBCWr:
2001 return AArch64::SBCSWr;
2002 case AArch64::SBCXr:
2003 return AArch64::SBCSXr;
2004 case AArch64::ANDWri:
2005 return AArch64::ANDSWri;
2006 case AArch64::ANDXri:
2007 return AArch64::ANDSXri;
2008 case AArch64::ANDWrr:
2009 return AArch64::ANDSWrr;
2010 case AArch64::ANDWrs:
2011 return AArch64::ANDSWrs;
2012 case AArch64::ANDXrr:
2013 return AArch64::ANDSXrr;
2014 case AArch64::ANDXrs:
2015 return AArch64::ANDSXrs;
2016 case AArch64::BICWrr:
2017 return AArch64::BICSWrr;
2018 case AArch64::BICXrr:
2019 return AArch64::BICSXrr;
2020 case AArch64::BICWrs:
2021 return AArch64::BICSWrs;
2022 case AArch64::BICXrs:
2023 return AArch64::BICSXrs;
2024 }
2025}
2026
2027/// Check if AArch64::NZCV should be alive in successors of MBB.
2029 for (auto *BB : MBB->successors())
2030 if (BB->isLiveIn(AArch64::NZCV))
2031 return true;
2032 return false;
2033}
2034
2035/// \returns The condition code operand index for \p Instr if it is a branch
2036/// or select and -1 otherwise.
2037static int
2039 switch (Instr.getOpcode()) {
2040 default:
2041 return -1;
2042
2043 case AArch64::Bcc: {
2044 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2045 assert(Idx >= 2);
2046 return Idx - 2;
2047 }
2048
2049 case AArch64::CSINVWr:
2050 case AArch64::CSINVXr:
2051 case AArch64::CSINCWr:
2052 case AArch64::CSINCXr:
2053 case AArch64::CSELWr:
2054 case AArch64::CSELXr:
2055 case AArch64::CSNEGWr:
2056 case AArch64::CSNEGXr:
2057 case AArch64::FCSELSrrr:
2058 case AArch64::FCSELDrrr: {
2059 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2060 assert(Idx >= 1);
2061 return Idx - 1;
2062 }
2063 }
2064}
2065
2066/// Find a condition code used by the instruction.
2067/// Returns AArch64CC::Invalid if either the instruction does not use condition
2068/// codes or we don't optimize CmpInstr in the presence of such instructions.
2071 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
2072 Instr.getOperand(CCIdx).getImm())
2074}
2075
2078 UsedNZCV UsedFlags;
2079 switch (CC) {
2080 default:
2081 break;
2082
2083 case AArch64CC::EQ: // Z set
2084 case AArch64CC::NE: // Z clear
2085 UsedFlags.Z = true;
2086 break;
2087
2088 case AArch64CC::HI: // Z clear and C set
2089 case AArch64CC::LS: // Z set or C clear
2090 UsedFlags.Z = true;
2091 [[fallthrough]];
2092 case AArch64CC::HS: // C set
2093 case AArch64CC::LO: // C clear
2094 UsedFlags.C = true;
2095 break;
2096
2097 case AArch64CC::MI: // N set
2098 case AArch64CC::PL: // N clear
2099 UsedFlags.N = true;
2100 break;
2101
2102 case AArch64CC::VS: // V set
2103 case AArch64CC::VC: // V clear
2104 UsedFlags.V = true;
2105 break;
2106
2107 case AArch64CC::GT: // Z clear, N and V the same
2108 case AArch64CC::LE: // Z set, N and V differ
2109 UsedFlags.Z = true;
2110 [[fallthrough]];
2111 case AArch64CC::GE: // N and V the same
2112 case AArch64CC::LT: // N and V differ
2113 UsedFlags.N = true;
2114 UsedFlags.V = true;
2115 break;
2116 }
2117 return UsedFlags;
2118}
2119
2120/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
2121/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
2122/// \returns std::nullopt otherwise.
2123///
2124/// Collect instructions using that flags in \p CCUseInstrs if provided.
2125std::optional<UsedNZCV>
2127 const TargetRegisterInfo &TRI,
2128 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
2129 MachineBasicBlock *CmpParent = CmpInstr.getParent();
2130 if (MI.getParent() != CmpParent)
2131 return std::nullopt;
2132
2133 if (areCFlagsAliveInSuccessors(CmpParent))
2134 return std::nullopt;
2135
2136 UsedNZCV NZCVUsedAfterCmp;
2138 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
2139 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
2141 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
2142 return std::nullopt;
2143 NZCVUsedAfterCmp |= getUsedNZCV(CC);
2144 if (CCUseInstrs)
2145 CCUseInstrs->push_back(&Instr);
2146 }
2147 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
2148 break;
2149 }
2150 return NZCVUsedAfterCmp;
2151}
2152
2153static bool isADDSRegImm(unsigned Opcode) {
2154 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
2155}
2156
2157static bool isSUBSRegImm(unsigned Opcode) {
2158 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
2159}
2160
2162 unsigned Opc = sForm(MI);
2163 switch (Opc) {
2164 case AArch64::ANDSWri:
2165 case AArch64::ANDSWrr:
2166 case AArch64::ANDSWrs:
2167 case AArch64::ANDSXri:
2168 case AArch64::ANDSXrr:
2169 case AArch64::ANDSXrs:
2170 case AArch64::BICSWrr:
2171 case AArch64::BICSXrr:
2172 case AArch64::BICSWrs:
2173 case AArch64::BICSXrs:
2174 return true;
2175 default:
2176 return false;
2177 }
2178}
2179
2180/// Check if CmpInstr can be substituted by MI.
2181///
2182/// CmpInstr can be substituted:
2183/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2184/// - and, MI and CmpInstr are from the same MachineBB
2185/// - and, condition flags are not alive in successors of the CmpInstr parent
2186/// - and, if MI opcode is the S form there must be no defs of flags between
2187/// MI and CmpInstr
2188/// or if MI opcode is not the S form there must be neither defs of flags
2189/// nor uses of flags between MI and CmpInstr.
2190/// - and, if C/V flags are not used after CmpInstr
2191/// or if N flag is used but MI produces poison value if signed overflow
2192/// occurs.
2194 const TargetRegisterInfo &TRI) {
2195 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
2196 // that may or may not set flags.
2197 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
2198
2199 const unsigned CmpOpcode = CmpInstr.getOpcode();
2200 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
2201 return false;
2202
2203 assert((CmpInstr.getOperand(2).isImm() &&
2204 CmpInstr.getOperand(2).getImm() == 0) &&
2205 "Caller guarantees that CmpInstr compares with constant 0");
2206
2207 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
2208 if (!NZVCUsed || NZVCUsed->C)
2209 return false;
2210
2211 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
2212 // '%vreg = add ...' or '%vreg = sub ...'.
2213 // Condition flag V is used to indicate signed overflow.
2214 // 1) MI and CmpInstr set N and V to the same value.
2215 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
2216 // signed overflow occurs, so CmpInstr could still be simplified away.
2217 // Note that Ands and Bics instructions always clear the V flag.
2218 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
2219 return false;
2220
2221 AccessKind AccessToCheck = AK_Write;
2222 if (sForm(MI) != MI.getOpcode())
2223 AccessToCheck = AK_All;
2224 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
2225}
2226
2227/// Substitute an instruction comparing to zero with another instruction
2228/// which produces needed condition flags.
2229///
2230/// Return true on success.
2231bool AArch64InstrInfo::substituteCmpToZero(
2232 MachineInstr &CmpInstr, unsigned SrcReg,
2233 const MachineRegisterInfo &MRI) const {
2234 // Get the unique definition of SrcReg.
2235 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2236 if (!MI)
2237 return false;
2238
2239 const TargetRegisterInfo &TRI = getRegisterInfo();
2240
2241 unsigned NewOpc = sForm(*MI);
2242 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
2243 return false;
2244
2245 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
2246 return false;
2247
2248 // Update the instruction to set NZCV.
2249 MI->setDesc(get(NewOpc));
2250 CmpInstr.eraseFromParent();
2252 (void)succeeded;
2253 assert(succeeded && "Some operands reg class are incompatible!");
2254 MI->addRegisterDefined(AArch64::NZCV, &TRI);
2255 return true;
2256}
2257
2258/// \returns True if \p CmpInstr can be removed.
2259///
2260/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
2261/// codes used in \p CCUseInstrs must be inverted.
2263 int CmpValue, const TargetRegisterInfo &TRI,
2265 bool &IsInvertCC) {
2266 assert((CmpValue == 0 || CmpValue == 1) &&
2267 "Only comparisons to 0 or 1 considered for removal!");
2268
2269 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2270 unsigned MIOpc = MI.getOpcode();
2271 if (MIOpc == AArch64::CSINCWr) {
2272 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2273 MI.getOperand(2).getReg() != AArch64::WZR)
2274 return false;
2275 } else if (MIOpc == AArch64::CSINCXr) {
2276 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2277 MI.getOperand(2).getReg() != AArch64::XZR)
2278 return false;
2279 } else {
2280 return false;
2281 }
2283 if (MICC == AArch64CC::Invalid)
2284 return false;
2285
2286 // NZCV needs to be defined
2287 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2288 return false;
2289
2290 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2291 const unsigned CmpOpcode = CmpInstr.getOpcode();
2292 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2293 if (CmpValue && !IsSubsRegImm)
2294 return false;
2295 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2296 return false;
2297
2298 // MI conditions allowed: eq, ne, mi, pl
2299 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2300 if (MIUsedNZCV.C || MIUsedNZCV.V)
2301 return false;
2302
2303 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2304 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2305 // Condition flags are not used in CmpInstr basic block successors and only
2306 // Z or N flags allowed to be used after CmpInstr within its basic block
2307 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2308 return false;
2309 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2310 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2311 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2312 return false;
2313 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2314 if (MIUsedNZCV.N && !CmpValue)
2315 return false;
2316
2317 // There must be no defs of flags between MI and CmpInstr
2318 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2319 return false;
2320
2321 // Condition code is inverted in the following cases:
2322 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2323 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2324 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2325 (!CmpValue && MICC == AArch64CC::NE);
2326 return true;
2327}
2328
2329/// Remove comparison in csinc-cmp sequence
2330///
2331/// Examples:
2332/// 1. \code
2333/// csinc w9, wzr, wzr, ne
2334/// cmp w9, #0
2335/// b.eq
2336/// \endcode
2337/// to
2338/// \code
2339/// csinc w9, wzr, wzr, ne
2340/// b.ne
2341/// \endcode
2342///
2343/// 2. \code
2344/// csinc x2, xzr, xzr, mi
2345/// cmp x2, #1
2346/// b.pl
2347/// \endcode
2348/// to
2349/// \code
2350/// csinc x2, xzr, xzr, mi
2351/// b.pl
2352/// \endcode
2353///
2354/// \param CmpInstr comparison instruction
2355/// \return True when comparison removed
2356bool AArch64InstrInfo::removeCmpToZeroOrOne(
2357 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2358 const MachineRegisterInfo &MRI) const {
2359 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2360 if (!MI)
2361 return false;
2362 const TargetRegisterInfo &TRI = getRegisterInfo();
2363 SmallVector<MachineInstr *, 4> CCUseInstrs;
2364 bool IsInvertCC = false;
2365 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2366 IsInvertCC))
2367 return false;
2368 // Make transformation
2369 CmpInstr.eraseFromParent();
2370 if (IsInvertCC) {
2371 // Invert condition codes in CmpInstr CC users
2372 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2373 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2374 assert(Idx >= 0 && "Unexpected instruction using CC.");
2375 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2377 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2378 CCOperand.setImm(CCUse);
2379 }
2380 }
2381 return true;
2382}
2383
2384bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2385 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2386 MI.getOpcode() != AArch64::CATCHRET)
2387 return false;
2388
2389 MachineBasicBlock &MBB = *MI.getParent();
2390 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2391 auto TRI = Subtarget.getRegisterInfo();
2392 DebugLoc DL = MI.getDebugLoc();
2393
2394 if (MI.getOpcode() == AArch64::CATCHRET) {
2395 // Skip to the first instruction before the epilog.
2396 const TargetInstrInfo *TII =
2398 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2400 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2401 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2402 FirstEpilogSEH != MBB.begin())
2403 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2404 if (FirstEpilogSEH != MBB.begin())
2405 FirstEpilogSEH = std::next(FirstEpilogSEH);
2406 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2407 .addReg(AArch64::X0, RegState::Define)
2408 .addMBB(TargetMBB);
2409 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2410 .addReg(AArch64::X0, RegState::Define)
2411 .addReg(AArch64::X0)
2412 .addMBB(TargetMBB)
2413 .addImm(0);
2414 TargetMBB->setMachineBlockAddressTaken();
2415 return true;
2416 }
2417
2418 Register Reg = MI.getOperand(0).getReg();
2420 if (M.getStackProtectorGuard() == "sysreg") {
2421 const AArch64SysReg::SysReg *SrcReg =
2422 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2423 if (!SrcReg)
2424 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2425
2426 // mrs xN, sysreg
2427 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2429 .addImm(SrcReg->Encoding);
2430 int Offset = M.getStackProtectorGuardOffset();
2431 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2432 // ldr xN, [xN, #offset]
2433 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2434 .addDef(Reg)
2436 .addImm(Offset / 8);
2437 } else if (Offset >= -256 && Offset <= 255) {
2438 // ldur xN, [xN, #offset]
2439 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2440 .addDef(Reg)
2442 .addImm(Offset);
2443 } else if (Offset >= -4095 && Offset <= 4095) {
2444 if (Offset > 0) {
2445 // add xN, xN, #offset
2446 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2447 .addDef(Reg)
2449 .addImm(Offset)
2450 .addImm(0);
2451 } else {
2452 // sub xN, xN, #offset
2453 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2454 .addDef(Reg)
2456 .addImm(-Offset)
2457 .addImm(0);
2458 }
2459 // ldr xN, [xN]
2460 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2461 .addDef(Reg)
2463 .addImm(0);
2464 } else {
2465 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2466 // than 23760.
2467 // It might be nice to use AArch64::MOVi32imm here, which would get
2468 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2469 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2470 // AArch64FrameLowering might help us find such a scratch register
2471 // though. If we failed to find a scratch register, we could emit a
2472 // stream of add instructions to build up the immediate. Or, we could try
2473 // to insert a AArch64::MOVi32imm before register allocation so that we
2474 // didn't need to scavenge for a scratch register.
2475 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2476 }
2477 MBB.erase(MI);
2478 return true;
2479 }
2480
2481 const GlobalValue *GV =
2482 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2483 const TargetMachine &TM = MBB.getParent()->getTarget();
2484 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2485 const unsigned char MO_NC = AArch64II::MO_NC;
2486
2487 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2488 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2489 .addGlobalAddress(GV, 0, OpFlags);
2490 if (Subtarget.isTargetILP32()) {
2491 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2492 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2493 .addDef(Reg32, RegState::Dead)
2495 .addImm(0)
2496 .addMemOperand(*MI.memoperands_begin())
2498 } else {
2499 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2501 .addImm(0)
2502 .addMemOperand(*MI.memoperands_begin());
2503 }
2504 } else if (TM.getCodeModel() == CodeModel::Large) {
2505 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2506 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2507 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2508 .addImm(0);
2509 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2511 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2512 .addImm(16);
2513 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2515 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2516 .addImm(32);
2517 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2520 .addImm(48);
2521 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2523 .addImm(0)
2524 .addMemOperand(*MI.memoperands_begin());
2525 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2526 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2527 .addGlobalAddress(GV, 0, OpFlags);
2528 } else {
2529 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2530 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2531 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2532 if (Subtarget.isTargetILP32()) {
2533 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2534 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2535 .addDef(Reg32, RegState::Dead)
2537 .addGlobalAddress(GV, 0, LoFlags)
2538 .addMemOperand(*MI.memoperands_begin())
2540 } else {
2541 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2543 .addGlobalAddress(GV, 0, LoFlags)
2544 .addMemOperand(*MI.memoperands_begin());
2545 }
2546 }
2547 // To match MSVC. Unlike x86_64 which uses xor instruction to mix the cookie,
2548 // we use sub instruction to mix the cookie on aarch64 for keeping the
2549 // existing inlining logic intact.
2550 if (Subtarget.getTargetTriple().isOSMSVCRT() &&
2551 !Subtarget.getTargetLowering()
2552 ->getTargetMachine()
2553 .Options.EnableGlobalISel) {
2554 BuildMI(MBB, MI, DL, get(AArch64::SUBXrx64), Reg)
2555 .addReg(AArch64::SP)
2558 }
2559
2560 MBB.erase(MI);
2561
2562 return true;
2563}
2564
2565// Return true if this instruction simply sets its single destination register
2566// to zero. This is equivalent to a register rename of the zero-register.
2568 switch (MI.getOpcode()) {
2569 default:
2570 break;
2571 case AArch64::MOVZWi:
2572 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2573 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2574 assert(MI.getDesc().getNumOperands() == 3 &&
2575 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2576 return true;
2577 }
2578 break;
2579 case AArch64::ANDWri: // and Rd, Rzr, #imm
2580 return MI.getOperand(1).getReg() == AArch64::WZR;
2581 case AArch64::ANDXri:
2582 return MI.getOperand(1).getReg() == AArch64::XZR;
2583 case TargetOpcode::COPY:
2584 return MI.getOperand(1).getReg() == AArch64::WZR;
2585 }
2586 return false;
2587}
2588
2589// Return true if this instruction simply renames a general register without
2590// modifying bits.
2592 switch (MI.getOpcode()) {
2593 default:
2594 break;
2595 case TargetOpcode::COPY: {
2596 // GPR32 copies will by lowered to ORRXrs
2597 Register DstReg = MI.getOperand(0).getReg();
2598 return (AArch64::GPR32RegClass.contains(DstReg) ||
2599 AArch64::GPR64RegClass.contains(DstReg));
2600 }
2601 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2602 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2603 assert(MI.getDesc().getNumOperands() == 4 &&
2604 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2605 return true;
2606 }
2607 break;
2608 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2609 if (MI.getOperand(2).getImm() == 0) {
2610 assert(MI.getDesc().getNumOperands() == 4 &&
2611 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2612 return true;
2613 }
2614 break;
2615 }
2616 return false;
2617}
2618
2619// Return true if this instruction simply renames a general register without
2620// modifying bits.
2622 switch (MI.getOpcode()) {
2623 default:
2624 break;
2625 case TargetOpcode::COPY: {
2626 Register DstReg = MI.getOperand(0).getReg();
2627 return AArch64::FPR128RegClass.contains(DstReg);
2628 }
2629 case AArch64::ORRv16i8:
2630 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2631 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2632 "invalid ORRv16i8 operands");
2633 return true;
2634 }
2635 break;
2636 }
2637 return false;
2638}
2639
2640static bool isFrameLoadOpcode(int Opcode) {
2641 switch (Opcode) {
2642 default:
2643 return false;
2644 case AArch64::LDRWui:
2645 case AArch64::LDRXui:
2646 case AArch64::LDRBui:
2647 case AArch64::LDRHui:
2648 case AArch64::LDRSui:
2649 case AArch64::LDRDui:
2650 case AArch64::LDRQui:
2651 case AArch64::LDR_PXI:
2652 return true;
2653 }
2654}
2655
2657 int &FrameIndex) const {
2658 if (!isFrameLoadOpcode(MI.getOpcode()))
2659 return Register();
2660
2661 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2662 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2663 FrameIndex = MI.getOperand(1).getIndex();
2664 return MI.getOperand(0).getReg();
2665 }
2666 return Register();
2667}
2668
2669static bool isFrameStoreOpcode(int Opcode) {
2670 switch (Opcode) {
2671 default:
2672 return false;
2673 case AArch64::STRWui:
2674 case AArch64::STRXui:
2675 case AArch64::STRBui:
2676 case AArch64::STRHui:
2677 case AArch64::STRSui:
2678 case AArch64::STRDui:
2679 case AArch64::STRQui:
2680 case AArch64::STR_PXI:
2681 return true;
2682 }
2683}
2684
2686 int &FrameIndex) const {
2687 if (!isFrameStoreOpcode(MI.getOpcode()))
2688 return Register();
2689
2690 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2691 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2692 FrameIndex = MI.getOperand(1).getIndex();
2693 return MI.getOperand(0).getReg();
2694 }
2695 return Register();
2696}
2697
2699 int &FrameIndex) const {
2700 if (!isFrameStoreOpcode(MI.getOpcode()))
2701 return Register();
2702
2703 if (Register Reg = isStoreToStackSlot(MI, FrameIndex))
2704 return Reg;
2705
2707 if (hasStoreToStackSlot(MI, Accesses)) {
2708 if (Accesses.size() > 1)
2709 return Register();
2710
2711 FrameIndex =
2712 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2713 ->getFrameIndex();
2714 return MI.getOperand(0).getReg();
2715 }
2716 return Register();
2717}
2718
2720 int &FrameIndex) const {
2721 if (!isFrameLoadOpcode(MI.getOpcode()))
2722 return Register();
2723
2724 if (Register Reg = isLoadFromStackSlot(MI, FrameIndex))
2725 return Reg;
2726
2728 if (hasLoadFromStackSlot(MI, Accesses)) {
2729 if (Accesses.size() > 1)
2730 return Register();
2731
2732 FrameIndex =
2733 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2734 ->getFrameIndex();
2735 return MI.getOperand(0).getReg();
2736 }
2737 return Register();
2738}
2739
2740/// Check all MachineMemOperands for a hint to suppress pairing.
2742 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2743 return MMO->getFlags() & MOSuppressPair;
2744 });
2745}
2746
2747/// Set a flag on the first MachineMemOperand to suppress pairing.
2749 if (MI.memoperands_empty())
2750 return;
2751 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2752}
2753
2754/// Check all MachineMemOperands for a hint that the load/store is strided.
2756 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2757 return MMO->getFlags() & MOStridedAccess;
2758 });
2759}
2760
2762 switch (Opc) {
2763 default:
2764 return false;
2765 case AArch64::STURSi:
2766 case AArch64::STRSpre:
2767 case AArch64::STURDi:
2768 case AArch64::STRDpre:
2769 case AArch64::STURQi:
2770 case AArch64::STRQpre:
2771 case AArch64::STURBBi:
2772 case AArch64::STURHHi:
2773 case AArch64::STURWi:
2774 case AArch64::STRWpre:
2775 case AArch64::STURXi:
2776 case AArch64::STRXpre:
2777 case AArch64::LDURSi:
2778 case AArch64::LDRSpre:
2779 case AArch64::LDURDi:
2780 case AArch64::LDRDpre:
2781 case AArch64::LDURQi:
2782 case AArch64::LDRQpre:
2783 case AArch64::LDURWi:
2784 case AArch64::LDRWpre:
2785 case AArch64::LDURXi:
2786 case AArch64::LDRXpre:
2787 case AArch64::LDRSWpre:
2788 case AArch64::LDURSWi:
2789 case AArch64::LDURHHi:
2790 case AArch64::LDURBBi:
2791 case AArch64::LDURSBWi:
2792 case AArch64::LDURSHWi:
2793 return true;
2794 }
2795}
2796
2797std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2798 switch (Opc) {
2799 default: return {};
2800 case AArch64::PRFMui: return AArch64::PRFUMi;
2801 case AArch64::LDRXui: return AArch64::LDURXi;
2802 case AArch64::LDRWui: return AArch64::LDURWi;
2803 case AArch64::LDRBui: return AArch64::LDURBi;
2804 case AArch64::LDRHui: return AArch64::LDURHi;
2805 case AArch64::LDRSui: return AArch64::LDURSi;
2806 case AArch64::LDRDui: return AArch64::LDURDi;
2807 case AArch64::LDRQui: return AArch64::LDURQi;
2808 case AArch64::LDRBBui: return AArch64::LDURBBi;
2809 case AArch64::LDRHHui: return AArch64::LDURHHi;
2810 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2811 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2812 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2813 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2814 case AArch64::LDRSWui: return AArch64::LDURSWi;
2815 case AArch64::STRXui: return AArch64::STURXi;
2816 case AArch64::STRWui: return AArch64::STURWi;
2817 case AArch64::STRBui: return AArch64::STURBi;
2818 case AArch64::STRHui: return AArch64::STURHi;
2819 case AArch64::STRSui: return AArch64::STURSi;
2820 case AArch64::STRDui: return AArch64::STURDi;
2821 case AArch64::STRQui: return AArch64::STURQi;
2822 case AArch64::STRBBui: return AArch64::STURBBi;
2823 case AArch64::STRHHui: return AArch64::STURHHi;
2824 }
2825}
2826
2828 switch (Opc) {
2829 default:
2830 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2831 case AArch64::ADDG:
2832 case AArch64::LDAPURBi:
2833 case AArch64::LDAPURHi:
2834 case AArch64::LDAPURi:
2835 case AArch64::LDAPURSBWi:
2836 case AArch64::LDAPURSBXi:
2837 case AArch64::LDAPURSHWi:
2838 case AArch64::LDAPURSHXi:
2839 case AArch64::LDAPURSWi:
2840 case AArch64::LDAPURXi:
2841 case AArch64::LDR_PPXI:
2842 case AArch64::LDR_PXI:
2843 case AArch64::LDR_ZXI:
2844 case AArch64::LDR_ZZXI:
2845 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2846 case AArch64::LDR_ZZZXI:
2847 case AArch64::LDR_ZZZZXI:
2848 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2849 case AArch64::LDRBBui:
2850 case AArch64::LDRBui:
2851 case AArch64::LDRDui:
2852 case AArch64::LDRHHui:
2853 case AArch64::LDRHui:
2854 case AArch64::LDRQui:
2855 case AArch64::LDRSBWui:
2856 case AArch64::LDRSBXui:
2857 case AArch64::LDRSHWui:
2858 case AArch64::LDRSHXui:
2859 case AArch64::LDRSui:
2860 case AArch64::LDRSWui:
2861 case AArch64::LDRWui:
2862 case AArch64::LDRXui:
2863 case AArch64::LDURBBi:
2864 case AArch64::LDURBi:
2865 case AArch64::LDURDi:
2866 case AArch64::LDURHHi:
2867 case AArch64::LDURHi:
2868 case AArch64::LDURQi:
2869 case AArch64::LDURSBWi:
2870 case AArch64::LDURSBXi:
2871 case AArch64::LDURSHWi:
2872 case AArch64::LDURSHXi:
2873 case AArch64::LDURSi:
2874 case AArch64::LDURSWi:
2875 case AArch64::LDURWi:
2876 case AArch64::LDURXi:
2877 case AArch64::PRFMui:
2878 case AArch64::PRFUMi:
2879 case AArch64::ST2Gi:
2880 case AArch64::STGi:
2881 case AArch64::STLURBi:
2882 case AArch64::STLURHi:
2883 case AArch64::STLURWi:
2884 case AArch64::STLURXi:
2885 case AArch64::StoreSwiftAsyncContext:
2886 case AArch64::STR_PPXI:
2887 case AArch64::STR_PXI:
2888 case AArch64::STR_ZXI:
2889 case AArch64::STR_ZZXI:
2890 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2891 case AArch64::STR_ZZZXI:
2892 case AArch64::STR_ZZZZXI:
2893 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2894 case AArch64::STRBBui:
2895 case AArch64::STRBui:
2896 case AArch64::STRDui:
2897 case AArch64::STRHHui:
2898 case AArch64::STRHui:
2899 case AArch64::STRQui:
2900 case AArch64::STRSui:
2901 case AArch64::STRWui:
2902 case AArch64::STRXui:
2903 case AArch64::STURBBi:
2904 case AArch64::STURBi:
2905 case AArch64::STURDi:
2906 case AArch64::STURHHi:
2907 case AArch64::STURHi:
2908 case AArch64::STURQi:
2909 case AArch64::STURSi:
2910 case AArch64::STURWi:
2911 case AArch64::STURXi:
2912 case AArch64::STZ2Gi:
2913 case AArch64::STZGi:
2914 case AArch64::TAGPstack:
2915 return 2;
2916 case AArch64::LD1B_D_IMM:
2917 case AArch64::LD1B_H_IMM:
2918 case AArch64::LD1B_IMM:
2919 case AArch64::LD1B_S_IMM:
2920 case AArch64::LD1D_IMM:
2921 case AArch64::LD1H_D_IMM:
2922 case AArch64::LD1H_IMM:
2923 case AArch64::LD1H_S_IMM:
2924 case AArch64::LD1RB_D_IMM:
2925 case AArch64::LD1RB_H_IMM:
2926 case AArch64::LD1RB_IMM:
2927 case AArch64::LD1RB_S_IMM:
2928 case AArch64::LD1RD_IMM:
2929 case AArch64::LD1RH_D_IMM:
2930 case AArch64::LD1RH_IMM:
2931 case AArch64::LD1RH_S_IMM:
2932 case AArch64::LD1RSB_D_IMM:
2933 case AArch64::LD1RSB_H_IMM:
2934 case AArch64::LD1RSB_S_IMM:
2935 case AArch64::LD1RSH_D_IMM:
2936 case AArch64::LD1RSH_S_IMM:
2937 case AArch64::LD1RSW_IMM:
2938 case AArch64::LD1RW_D_IMM:
2939 case AArch64::LD1RW_IMM:
2940 case AArch64::LD1SB_D_IMM:
2941 case AArch64::LD1SB_H_IMM:
2942 case AArch64::LD1SB_S_IMM:
2943 case AArch64::LD1SH_D_IMM:
2944 case AArch64::LD1SH_S_IMM:
2945 case AArch64::LD1SW_D_IMM:
2946 case AArch64::LD1W_D_IMM:
2947 case AArch64::LD1W_IMM:
2948 case AArch64::LD2B_IMM:
2949 case AArch64::LD2D_IMM:
2950 case AArch64::LD2H_IMM:
2951 case AArch64::LD2W_IMM:
2952 case AArch64::LD3B_IMM:
2953 case AArch64::LD3D_IMM:
2954 case AArch64::LD3H_IMM:
2955 case AArch64::LD3W_IMM:
2956 case AArch64::LD4B_IMM:
2957 case AArch64::LD4D_IMM:
2958 case AArch64::LD4H_IMM:
2959 case AArch64::LD4W_IMM:
2960 case AArch64::LDG:
2961 case AArch64::LDNF1B_D_IMM:
2962 case AArch64::LDNF1B_H_IMM:
2963 case AArch64::LDNF1B_IMM:
2964 case AArch64::LDNF1B_S_IMM:
2965 case AArch64::LDNF1D_IMM:
2966 case AArch64::LDNF1H_D_IMM:
2967 case AArch64::LDNF1H_IMM:
2968 case AArch64::LDNF1H_S_IMM:
2969 case AArch64::LDNF1SB_D_IMM:
2970 case AArch64::LDNF1SB_H_IMM:
2971 case AArch64::LDNF1SB_S_IMM:
2972 case AArch64::LDNF1SH_D_IMM:
2973 case AArch64::LDNF1SH_S_IMM:
2974 case AArch64::LDNF1SW_D_IMM:
2975 case AArch64::LDNF1W_D_IMM:
2976 case AArch64::LDNF1W_IMM:
2977 case AArch64::LDNPDi:
2978 case AArch64::LDNPQi:
2979 case AArch64::LDNPSi:
2980 case AArch64::LDNPWi:
2981 case AArch64::LDNPXi:
2982 case AArch64::LDNT1B_ZRI:
2983 case AArch64::LDNT1D_ZRI:
2984 case AArch64::LDNT1H_ZRI:
2985 case AArch64::LDNT1W_ZRI:
2986 case AArch64::LDPDi:
2987 case AArch64::LDPQi:
2988 case AArch64::LDPSi:
2989 case AArch64::LDPWi:
2990 case AArch64::LDPXi:
2991 case AArch64::LDRBBpost:
2992 case AArch64::LDRBBpre:
2993 case AArch64::LDRBpost:
2994 case AArch64::LDRBpre:
2995 case AArch64::LDRDpost:
2996 case AArch64::LDRDpre:
2997 case AArch64::LDRHHpost:
2998 case AArch64::LDRHHpre:
2999 case AArch64::LDRHpost:
3000 case AArch64::LDRHpre:
3001 case AArch64::LDRQpost:
3002 case AArch64::LDRQpre:
3003 case AArch64::LDRSpost:
3004 case AArch64::LDRSpre:
3005 case AArch64::LDRWpost:
3006 case AArch64::LDRWpre:
3007 case AArch64::LDRXpost:
3008 case AArch64::LDRXpre:
3009 case AArch64::ST1B_D_IMM:
3010 case AArch64::ST1B_H_IMM:
3011 case AArch64::ST1B_IMM:
3012 case AArch64::ST1B_S_IMM:
3013 case AArch64::ST1D_IMM:
3014 case AArch64::ST1H_D_IMM:
3015 case AArch64::ST1H_IMM:
3016 case AArch64::ST1H_S_IMM:
3017 case AArch64::ST1W_D_IMM:
3018 case AArch64::ST1W_IMM:
3019 case AArch64::ST2B_IMM:
3020 case AArch64::ST2D_IMM:
3021 case AArch64::ST2H_IMM:
3022 case AArch64::ST2W_IMM:
3023 case AArch64::ST3B_IMM:
3024 case AArch64::ST3D_IMM:
3025 case AArch64::ST3H_IMM:
3026 case AArch64::ST3W_IMM:
3027 case AArch64::ST4B_IMM:
3028 case AArch64::ST4D_IMM:
3029 case AArch64::ST4H_IMM:
3030 case AArch64::ST4W_IMM:
3031 case AArch64::STGPi:
3032 case AArch64::STGPreIndex:
3033 case AArch64::STZGPreIndex:
3034 case AArch64::ST2GPreIndex:
3035 case AArch64::STZ2GPreIndex:
3036 case AArch64::STGPostIndex:
3037 case AArch64::STZGPostIndex:
3038 case AArch64::ST2GPostIndex:
3039 case AArch64::STZ2GPostIndex:
3040 case AArch64::STNPDi:
3041 case AArch64::STNPQi:
3042 case AArch64::STNPSi:
3043 case AArch64::STNPWi:
3044 case AArch64::STNPXi:
3045 case AArch64::STNT1B_ZRI:
3046 case AArch64::STNT1D_ZRI:
3047 case AArch64::STNT1H_ZRI:
3048 case AArch64::STNT1W_ZRI:
3049 case AArch64::STPDi:
3050 case AArch64::STPQi:
3051 case AArch64::STPSi:
3052 case AArch64::STPWi:
3053 case AArch64::STPXi:
3054 case AArch64::STRBBpost:
3055 case AArch64::STRBBpre:
3056 case AArch64::STRBpost:
3057 case AArch64::STRBpre:
3058 case AArch64::STRDpost:
3059 case AArch64::STRDpre:
3060 case AArch64::STRHHpost:
3061 case AArch64::STRHHpre:
3062 case AArch64::STRHpost:
3063 case AArch64::STRHpre:
3064 case AArch64::STRQpost:
3065 case AArch64::STRQpre:
3066 case AArch64::STRSpost:
3067 case AArch64::STRSpre:
3068 case AArch64::STRWpost:
3069 case AArch64::STRWpre:
3070 case AArch64::STRXpost:
3071 case AArch64::STRXpre:
3072 return 3;
3073 case AArch64::LDPDpost:
3074 case AArch64::LDPDpre:
3075 case AArch64::LDPQpost:
3076 case AArch64::LDPQpre:
3077 case AArch64::LDPSpost:
3078 case AArch64::LDPSpre:
3079 case AArch64::LDPWpost:
3080 case AArch64::LDPWpre:
3081 case AArch64::LDPXpost:
3082 case AArch64::LDPXpre:
3083 case AArch64::STGPpre:
3084 case AArch64::STGPpost:
3085 case AArch64::STPDpost:
3086 case AArch64::STPDpre:
3087 case AArch64::STPQpost:
3088 case AArch64::STPQpre:
3089 case AArch64::STPSpost:
3090 case AArch64::STPSpre:
3091 case AArch64::STPWpost:
3092 case AArch64::STPWpre:
3093 case AArch64::STPXpost:
3094 case AArch64::STPXpre:
3095 return 4;
3096 }
3097}
3098
3100 switch (MI.getOpcode()) {
3101 default:
3102 return false;
3103 // Scaled instructions.
3104 case AArch64::STRSui:
3105 case AArch64::STRDui:
3106 case AArch64::STRQui:
3107 case AArch64::STRXui:
3108 case AArch64::STRWui:
3109 case AArch64::LDRSui:
3110 case AArch64::LDRDui:
3111 case AArch64::LDRQui:
3112 case AArch64::LDRXui:
3113 case AArch64::LDRWui:
3114 case AArch64::LDRSWui:
3115 // Unscaled instructions.
3116 case AArch64::STURSi:
3117 case AArch64::STRSpre:
3118 case AArch64::STURDi:
3119 case AArch64::STRDpre:
3120 case AArch64::STURQi:
3121 case AArch64::STRQpre:
3122 case AArch64::STURWi:
3123 case AArch64::STRWpre:
3124 case AArch64::STURXi:
3125 case AArch64::STRXpre:
3126 case AArch64::LDURSi:
3127 case AArch64::LDRSpre:
3128 case AArch64::LDURDi:
3129 case AArch64::LDRDpre:
3130 case AArch64::LDURQi:
3131 case AArch64::LDRQpre:
3132 case AArch64::LDURWi:
3133 case AArch64::LDRWpre:
3134 case AArch64::LDURXi:
3135 case AArch64::LDRXpre:
3136 case AArch64::LDURSWi:
3137 case AArch64::LDRSWpre:
3138 // SVE instructions.
3139 case AArch64::LDR_ZXI:
3140 case AArch64::STR_ZXI:
3141 return true;
3142 }
3143}
3144
3146 switch (MI.getOpcode()) {
3147 default:
3148 assert((!MI.isCall() || !MI.isReturn()) &&
3149 "Unexpected instruction - was a new tail call opcode introduced?");
3150 return false;
3151 case AArch64::TCRETURNdi:
3152 case AArch64::TCRETURNri:
3153 case AArch64::TCRETURNrix16x17:
3154 case AArch64::TCRETURNrix17:
3155 case AArch64::TCRETURNrinotx16:
3156 case AArch64::TCRETURNriALL:
3157 case AArch64::AUTH_TCRETURN:
3158 case AArch64::AUTH_TCRETURN_BTI:
3159 return true;
3160 }
3161}
3162
3164 switch (Opc) {
3165 default:
3166 llvm_unreachable("Opcode has no flag setting equivalent!");
3167 // 32-bit cases:
3168 case AArch64::ADDWri:
3169 return AArch64::ADDSWri;
3170 case AArch64::ADDWrr:
3171 return AArch64::ADDSWrr;
3172 case AArch64::ADDWrs:
3173 return AArch64::ADDSWrs;
3174 case AArch64::ADDWrx:
3175 return AArch64::ADDSWrx;
3176 case AArch64::ANDWri:
3177 return AArch64::ANDSWri;
3178 case AArch64::ANDWrr:
3179 return AArch64::ANDSWrr;
3180 case AArch64::ANDWrs:
3181 return AArch64::ANDSWrs;
3182 case AArch64::BICWrr:
3183 return AArch64::BICSWrr;
3184 case AArch64::BICWrs:
3185 return AArch64::BICSWrs;
3186 case AArch64::SUBWri:
3187 return AArch64::SUBSWri;
3188 case AArch64::SUBWrr:
3189 return AArch64::SUBSWrr;
3190 case AArch64::SUBWrs:
3191 return AArch64::SUBSWrs;
3192 case AArch64::SUBWrx:
3193 return AArch64::SUBSWrx;
3194 // 64-bit cases:
3195 case AArch64::ADDXri:
3196 return AArch64::ADDSXri;
3197 case AArch64::ADDXrr:
3198 return AArch64::ADDSXrr;
3199 case AArch64::ADDXrs:
3200 return AArch64::ADDSXrs;
3201 case AArch64::ADDXrx:
3202 return AArch64::ADDSXrx;
3203 case AArch64::ANDXri:
3204 return AArch64::ANDSXri;
3205 case AArch64::ANDXrr:
3206 return AArch64::ANDSXrr;
3207 case AArch64::ANDXrs:
3208 return AArch64::ANDSXrs;
3209 case AArch64::BICXrr:
3210 return AArch64::BICSXrr;
3211 case AArch64::BICXrs:
3212 return AArch64::BICSXrs;
3213 case AArch64::SUBXri:
3214 return AArch64::SUBSXri;
3215 case AArch64::SUBXrr:
3216 return AArch64::SUBSXrr;
3217 case AArch64::SUBXrs:
3218 return AArch64::SUBSXrs;
3219 case AArch64::SUBXrx:
3220 return AArch64::SUBSXrx;
3221 // SVE instructions:
3222 case AArch64::AND_PPzPP:
3223 return AArch64::ANDS_PPzPP;
3224 case AArch64::BIC_PPzPP:
3225 return AArch64::BICS_PPzPP;
3226 case AArch64::EOR_PPzPP:
3227 return AArch64::EORS_PPzPP;
3228 case AArch64::NAND_PPzPP:
3229 return AArch64::NANDS_PPzPP;
3230 case AArch64::NOR_PPzPP:
3231 return AArch64::NORS_PPzPP;
3232 case AArch64::ORN_PPzPP:
3233 return AArch64::ORNS_PPzPP;
3234 case AArch64::ORR_PPzPP:
3235 return AArch64::ORRS_PPzPP;
3236 case AArch64::BRKA_PPzP:
3237 return AArch64::BRKAS_PPzP;
3238 case AArch64::BRKPA_PPzPP:
3239 return AArch64::BRKPAS_PPzPP;
3240 case AArch64::BRKB_PPzP:
3241 return AArch64::BRKBS_PPzP;
3242 case AArch64::BRKPB_PPzPP:
3243 return AArch64::BRKPBS_PPzPP;
3244 case AArch64::BRKN_PPzP:
3245 return AArch64::BRKNS_PPzP;
3246 case AArch64::RDFFR_PPz:
3247 return AArch64::RDFFRS_PPz;
3248 case AArch64::PTRUE_B:
3249 return AArch64::PTRUES_B;
3250 }
3251}
3252
3253// Is this a candidate for ld/st merging or pairing? For example, we don't
3254// touch volatiles or load/stores that have a hint to avoid pair formation.
3256
3257 bool IsPreLdSt = isPreLdSt(MI);
3258
3259 // If this is a volatile load/store, don't mess with it.
3260 if (MI.hasOrderedMemoryRef())
3261 return false;
3262
3263 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
3264 // For Pre-inc LD/ST, the operand is shifted by one.
3265 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
3266 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
3267 "Expected a reg or frame index operand.");
3268
3269 // For Pre-indexed addressing quadword instructions, the third operand is the
3270 // immediate value.
3271 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
3272
3273 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
3274 return false;
3275
3276 // Can't merge/pair if the instruction modifies the base register.
3277 // e.g., ldr x0, [x0]
3278 // This case will never occur with an FI base.
3279 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
3280 // STR<S,D,Q,W,X>pre, it can be merged.
3281 // For example:
3282 // ldr q0, [x11, #32]!
3283 // ldr q1, [x11, #16]
3284 // to
3285 // ldp q0, q1, [x11, #32]!
3286 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
3287 Register BaseReg = MI.getOperand(1).getReg();
3289 if (MI.modifiesRegister(BaseReg, TRI))
3290 return false;
3291 }
3292
3293 // Pairing SVE fills/spills is only valid for little-endian targets that
3294 // implement VLS 128.
3295 switch (MI.getOpcode()) {
3296 default:
3297 break;
3298 case AArch64::LDR_ZXI:
3299 case AArch64::STR_ZXI:
3300 if (!Subtarget.isLittleEndian() ||
3301 Subtarget.getSVEVectorSizeInBits() != 128)
3302 return false;
3303 }
3304
3305 // Check if this load/store has a hint to avoid pair formation.
3306 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
3308 return false;
3309
3310 // Do not pair any callee-save store/reload instructions in the
3311 // prologue/epilogue if the CFI information encoded the operations as separate
3312 // instructions, as that will cause the size of the actual prologue to mismatch
3313 // with the prologue size recorded in the Windows CFI.
3314 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
3315 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
3316 MI.getMF()->getFunction().needsUnwindTableEntry();
3317 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
3319 return false;
3320
3321 // On some CPUs quad load/store pairs are slower than two single load/stores.
3322 if (Subtarget.isPaired128Slow()) {
3323 switch (MI.getOpcode()) {
3324 default:
3325 break;
3326 case AArch64::LDURQi:
3327 case AArch64::STURQi:
3328 case AArch64::LDRQui:
3329 case AArch64::STRQui:
3330 return false;
3331 }
3332 }
3333
3334 return true;
3335}
3336
3339 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3340 const TargetRegisterInfo *TRI) const {
3341 if (!LdSt.mayLoadOrStore())
3342 return false;
3343
3344 const MachineOperand *BaseOp;
3345 TypeSize WidthN(0, false);
3346 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3347 WidthN, TRI))
3348 return false;
3349 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3350 // vector.
3351 Width = LocationSize::precise(WidthN);
3352 BaseOps.push_back(BaseOp);
3353 return true;
3354}
3355
3356std::optional<ExtAddrMode>
3358 const TargetRegisterInfo *TRI) const {
3359 const MachineOperand *Base; // Filled with the base operand of MI.
3360 int64_t Offset; // Filled with the offset of MI.
3361 bool OffsetIsScalable;
3362 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3363 return std::nullopt;
3364
3365 if (!Base->isReg())
3366 return std::nullopt;
3367 ExtAddrMode AM;
3368 AM.BaseReg = Base->getReg();
3369 AM.Displacement = Offset;
3370 AM.ScaledReg = 0;
3371 AM.Scale = 0;
3372 return AM;
3373}
3374
3376 Register Reg,
3377 const MachineInstr &AddrI,
3378 ExtAddrMode &AM) const {
3379 // Filter out instructions into which we cannot fold.
3380 unsigned NumBytes;
3381 int64_t OffsetScale = 1;
3382 switch (MemI.getOpcode()) {
3383 default:
3384 return false;
3385
3386 case AArch64::LDURQi:
3387 case AArch64::STURQi:
3388 NumBytes = 16;
3389 break;
3390
3391 case AArch64::LDURDi:
3392 case AArch64::STURDi:
3393 case AArch64::LDURXi:
3394 case AArch64::STURXi:
3395 NumBytes = 8;
3396 break;
3397
3398 case AArch64::LDURWi:
3399 case AArch64::LDURSWi:
3400 case AArch64::STURWi:
3401 NumBytes = 4;
3402 break;
3403
3404 case AArch64::LDURHi:
3405 case AArch64::STURHi:
3406 case AArch64::LDURHHi:
3407 case AArch64::STURHHi:
3408 case AArch64::LDURSHXi:
3409 case AArch64::LDURSHWi:
3410 NumBytes = 2;
3411 break;
3412
3413 case AArch64::LDRBroX:
3414 case AArch64::LDRBBroX:
3415 case AArch64::LDRSBXroX:
3416 case AArch64::LDRSBWroX:
3417 case AArch64::STRBroX:
3418 case AArch64::STRBBroX:
3419 case AArch64::LDURBi:
3420 case AArch64::LDURBBi:
3421 case AArch64::LDURSBXi:
3422 case AArch64::LDURSBWi:
3423 case AArch64::STURBi:
3424 case AArch64::STURBBi:
3425 case AArch64::LDRBui:
3426 case AArch64::LDRBBui:
3427 case AArch64::LDRSBXui:
3428 case AArch64::LDRSBWui:
3429 case AArch64::STRBui:
3430 case AArch64::STRBBui:
3431 NumBytes = 1;
3432 break;
3433
3434 case AArch64::LDRQroX:
3435 case AArch64::STRQroX:
3436 case AArch64::LDRQui:
3437 case AArch64::STRQui:
3438 NumBytes = 16;
3439 OffsetScale = 16;
3440 break;
3441
3442 case AArch64::LDRDroX:
3443 case AArch64::STRDroX:
3444 case AArch64::LDRXroX:
3445 case AArch64::STRXroX:
3446 case AArch64::LDRDui:
3447 case AArch64::STRDui:
3448 case AArch64::LDRXui:
3449 case AArch64::STRXui:
3450 NumBytes = 8;
3451 OffsetScale = 8;
3452 break;
3453
3454 case AArch64::LDRWroX:
3455 case AArch64::LDRSWroX:
3456 case AArch64::STRWroX:
3457 case AArch64::LDRWui:
3458 case AArch64::LDRSWui:
3459 case AArch64::STRWui:
3460 NumBytes = 4;
3461 OffsetScale = 4;
3462 break;
3463
3464 case AArch64::LDRHroX:
3465 case AArch64::STRHroX:
3466 case AArch64::LDRHHroX:
3467 case AArch64::STRHHroX:
3468 case AArch64::LDRSHXroX:
3469 case AArch64::LDRSHWroX:
3470 case AArch64::LDRHui:
3471 case AArch64::STRHui:
3472 case AArch64::LDRHHui:
3473 case AArch64::STRHHui:
3474 case AArch64::LDRSHXui:
3475 case AArch64::LDRSHWui:
3476 NumBytes = 2;
3477 OffsetScale = 2;
3478 break;
3479 }
3480
3481 // Check the fold operand is not the loaded/stored value.
3482 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3483 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3484 return false;
3485
3486 // Handle memory instructions with a [Reg, Reg] addressing mode.
3487 if (MemI.getOperand(2).isReg()) {
3488 // Bail if the addressing mode already includes extension of the offset
3489 // register.
3490 if (MemI.getOperand(3).getImm())
3491 return false;
3492
3493 // Check if we actually have a scaled offset.
3494 if (MemI.getOperand(4).getImm() == 0)
3495 OffsetScale = 1;
3496
3497 // If the address instructions is folded into the base register, then the
3498 // addressing mode must not have a scale. Then we can swap the base and the
3499 // scaled registers.
3500 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3501 return false;
3502
3503 switch (AddrI.getOpcode()) {
3504 default:
3505 return false;
3506
3507 case AArch64::SBFMXri:
3508 // sxtw Xa, Wm
3509 // ldr Xd, [Xn, Xa, lsl #N]
3510 // ->
3511 // ldr Xd, [Xn, Wm, sxtw #N]
3512 if (AddrI.getOperand(2).getImm() != 0 ||
3513 AddrI.getOperand(3).getImm() != 31)
3514 return false;
3515
3516 AM.BaseReg = MemI.getOperand(1).getReg();
3517 if (AM.BaseReg == Reg)
3518 AM.BaseReg = MemI.getOperand(2).getReg();
3519 AM.ScaledReg = AddrI.getOperand(1).getReg();
3520 AM.Scale = OffsetScale;
3521 AM.Displacement = 0;
3523 return true;
3524
3525 case TargetOpcode::SUBREG_TO_REG: {
3526 // mov Wa, Wm
3527 // ldr Xd, [Xn, Xa, lsl #N]
3528 // ->
3529 // ldr Xd, [Xn, Wm, uxtw #N]
3530
3531 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3532 if (AddrI.getOperand(1).getImm() != 0 ||
3533 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3534 return false;
3535
3536 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3537 Register OffsetReg = AddrI.getOperand(2).getReg();
3538 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3539 return false;
3540
3541 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3542 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3543 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3544 DefMI.getOperand(3).getImm() != 0)
3545 return false;
3546
3547 AM.BaseReg = MemI.getOperand(1).getReg();
3548 if (AM.BaseReg == Reg)
3549 AM.BaseReg = MemI.getOperand(2).getReg();
3550 AM.ScaledReg = DefMI.getOperand(2).getReg();
3551 AM.Scale = OffsetScale;
3552 AM.Displacement = 0;
3554 return true;
3555 }
3556 }
3557 }
3558
3559 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3560
3561 // Check we are not breaking a potential conversion to an LDP.
3562 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3563 int64_t NewOffset) -> bool {
3564 int64_t MinOffset, MaxOffset;
3565 switch (NumBytes) {
3566 default:
3567 return true;
3568 case 4:
3569 MinOffset = -256;
3570 MaxOffset = 252;
3571 break;
3572 case 8:
3573 MinOffset = -512;
3574 MaxOffset = 504;
3575 break;
3576 case 16:
3577 MinOffset = -1024;
3578 MaxOffset = 1008;
3579 break;
3580 }
3581 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3582 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3583 };
3584 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3585 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3586 int64_t NewOffset = OldOffset + Disp;
3587 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3588 return false;
3589 // If the old offset would fit into an LDP, but the new offset wouldn't,
3590 // bail out.
3591 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3592 return false;
3593 AM.BaseReg = AddrI.getOperand(1).getReg();
3594 AM.ScaledReg = 0;
3595 AM.Scale = 0;
3596 AM.Displacement = NewOffset;
3598 return true;
3599 };
3600
3601 auto canFoldAddRegIntoAddrMode =
3602 [&](int64_t Scale,
3604 if (MemI.getOperand(2).getImm() != 0)
3605 return false;
3606 if ((unsigned)Scale != Scale)
3607 return false;
3608 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3609 return false;
3610 AM.BaseReg = AddrI.getOperand(1).getReg();
3611 AM.ScaledReg = AddrI.getOperand(2).getReg();
3612 AM.Scale = Scale;
3613 AM.Displacement = 0;
3614 AM.Form = Form;
3615 return true;
3616 };
3617
3618 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3619 unsigned Opcode = MemI.getOpcode();
3620 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3621 Subtarget.isSTRQroSlow();
3622 };
3623
3624 int64_t Disp = 0;
3625 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3626 switch (AddrI.getOpcode()) {
3627 default:
3628 return false;
3629
3630 case AArch64::ADDXri:
3631 // add Xa, Xn, #N
3632 // ldr Xd, [Xa, #M]
3633 // ->
3634 // ldr Xd, [Xn, #N'+M]
3635 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3636 return canFoldAddSubImmIntoAddrMode(Disp);
3637
3638 case AArch64::SUBXri:
3639 // sub Xa, Xn, #N
3640 // ldr Xd, [Xa, #M]
3641 // ->
3642 // ldr Xd, [Xn, #N'+M]
3643 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3644 return canFoldAddSubImmIntoAddrMode(-Disp);
3645
3646 case AArch64::ADDXrs: {
3647 // add Xa, Xn, Xm, lsl #N
3648 // ldr Xd, [Xa]
3649 // ->
3650 // ldr Xd, [Xn, Xm, lsl #N]
3651
3652 // Don't fold the add if the result would be slower, unless optimising for
3653 // size.
3654 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3656 return false;
3657 Shift = AArch64_AM::getShiftValue(Shift);
3658 if (!OptSize) {
3659 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3660 return false;
3661 if (avoidSlowSTRQ(MemI))
3662 return false;
3663 }
3664 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3665 }
3666
3667 case AArch64::ADDXrr:
3668 // add Xa, Xn, Xm
3669 // ldr Xd, [Xa]
3670 // ->
3671 // ldr Xd, [Xn, Xm, lsl #0]
3672
3673 // Don't fold the add if the result would be slower, unless optimising for
3674 // size.
3675 if (!OptSize && avoidSlowSTRQ(MemI))
3676 return false;
3677 return canFoldAddRegIntoAddrMode(1);
3678
3679 case AArch64::ADDXrx:
3680 // add Xa, Xn, Wm, {s,u}xtw #N
3681 // ldr Xd, [Xa]
3682 // ->
3683 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3684
3685 // Don't fold the add if the result would be slower, unless optimising for
3686 // size.
3687 if (!OptSize && avoidSlowSTRQ(MemI))
3688 return false;
3689
3690 // Can fold only sign-/zero-extend of a word.
3691 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3693 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3694 return false;
3695
3696 return canFoldAddRegIntoAddrMode(
3697 1ULL << AArch64_AM::getArithShiftValue(Imm),
3700 }
3701}
3702
3703// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3704// return the opcode of an instruction performing the same operation, but using
3705// the [Reg, Reg] addressing mode.
3706static unsigned regOffsetOpcode(unsigned Opcode) {
3707 switch (Opcode) {
3708 default:
3709 llvm_unreachable("Address folding not implemented for instruction");
3710
3711 case AArch64::LDURQi:
3712 case AArch64::LDRQui:
3713 return AArch64::LDRQroX;
3714 case AArch64::STURQi:
3715 case AArch64::STRQui:
3716 return AArch64::STRQroX;
3717 case AArch64::LDURDi:
3718 case AArch64::LDRDui:
3719 return AArch64::LDRDroX;
3720 case AArch64::STURDi:
3721 case AArch64::STRDui:
3722 return AArch64::STRDroX;
3723 case AArch64::LDURXi:
3724 case AArch64::LDRXui:
3725 return AArch64::LDRXroX;
3726 case AArch64::STURXi:
3727 case AArch64::STRXui:
3728 return AArch64::STRXroX;
3729 case AArch64::LDURWi:
3730 case AArch64::LDRWui:
3731 return AArch64::LDRWroX;
3732 case AArch64::LDURSWi:
3733 case AArch64::LDRSWui:
3734 return AArch64::LDRSWroX;
3735 case AArch64::STURWi:
3736 case AArch64::STRWui:
3737 return AArch64::STRWroX;
3738 case AArch64::LDURHi:
3739 case AArch64::LDRHui:
3740 return AArch64::LDRHroX;
3741 case AArch64::STURHi:
3742 case AArch64::STRHui:
3743 return AArch64::STRHroX;
3744 case AArch64::LDURHHi:
3745 case AArch64::LDRHHui:
3746 return AArch64::LDRHHroX;
3747 case AArch64::STURHHi:
3748 case AArch64::STRHHui:
3749 return AArch64::STRHHroX;
3750 case AArch64::LDURSHXi:
3751 case AArch64::LDRSHXui:
3752 return AArch64::LDRSHXroX;
3753 case AArch64::LDURSHWi:
3754 case AArch64::LDRSHWui:
3755 return AArch64::LDRSHWroX;
3756 case AArch64::LDURBi:
3757 case AArch64::LDRBui:
3758 return AArch64::LDRBroX;
3759 case AArch64::LDURBBi:
3760 case AArch64::LDRBBui:
3761 return AArch64::LDRBBroX;
3762 case AArch64::LDURSBXi:
3763 case AArch64::LDRSBXui:
3764 return AArch64::LDRSBXroX;
3765 case AArch64::LDURSBWi:
3766 case AArch64::LDRSBWui:
3767 return AArch64::LDRSBWroX;
3768 case AArch64::STURBi:
3769 case AArch64::STRBui:
3770 return AArch64::STRBroX;
3771 case AArch64::STURBBi:
3772 case AArch64::STRBBui:
3773 return AArch64::STRBBroX;
3774 }
3775}
3776
3777// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3778// the opcode of an instruction performing the same operation, but using the
3779// [Reg, #Imm] addressing mode with scaled offset.
3780unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3781 switch (Opcode) {
3782 default:
3783 llvm_unreachable("Address folding not implemented for instruction");
3784
3785 case AArch64::LDURQi:
3786 Scale = 16;
3787 return AArch64::LDRQui;
3788 case AArch64::STURQi:
3789 Scale = 16;
3790 return AArch64::STRQui;
3791 case AArch64::LDURDi:
3792 Scale = 8;
3793 return AArch64::LDRDui;
3794 case AArch64::STURDi:
3795 Scale = 8;
3796 return AArch64::STRDui;
3797 case AArch64::LDURXi:
3798 Scale = 8;
3799 return AArch64::LDRXui;
3800 case AArch64::STURXi:
3801 Scale = 8;
3802 return AArch64::STRXui;
3803 case AArch64::LDURWi:
3804 Scale = 4;
3805 return AArch64::LDRWui;
3806 case AArch64::LDURSWi:
3807 Scale = 4;
3808 return AArch64::LDRSWui;
3809 case AArch64::STURWi:
3810 Scale = 4;
3811 return AArch64::STRWui;
3812 case AArch64::LDURHi:
3813 Scale = 2;
3814 return AArch64::LDRHui;
3815 case AArch64::STURHi:
3816 Scale = 2;
3817 return AArch64::STRHui;
3818 case AArch64::LDURHHi:
3819 Scale = 2;
3820 return AArch64::LDRHHui;
3821 case AArch64::STURHHi:
3822 Scale = 2;
3823 return AArch64::STRHHui;
3824 case AArch64::LDURSHXi:
3825 Scale = 2;
3826 return AArch64::LDRSHXui;
3827 case AArch64::LDURSHWi:
3828 Scale = 2;
3829 return AArch64::LDRSHWui;
3830 case AArch64::LDURBi:
3831 Scale = 1;
3832 return AArch64::LDRBui;
3833 case AArch64::LDURBBi:
3834 Scale = 1;
3835 return AArch64::LDRBBui;
3836 case AArch64::LDURSBXi:
3837 Scale = 1;
3838 return AArch64::LDRSBXui;
3839 case AArch64::LDURSBWi:
3840 Scale = 1;
3841 return AArch64::LDRSBWui;
3842 case AArch64::STURBi:
3843 Scale = 1;
3844 return AArch64::STRBui;
3845 case AArch64::STURBBi:
3846 Scale = 1;
3847 return AArch64::STRBBui;
3848 case AArch64::LDRQui:
3849 case AArch64::STRQui:
3850 Scale = 16;
3851 return Opcode;
3852 case AArch64::LDRDui:
3853 case AArch64::STRDui:
3854 case AArch64::LDRXui:
3855 case AArch64::STRXui:
3856 Scale = 8;
3857 return Opcode;
3858 case AArch64::LDRWui:
3859 case AArch64::LDRSWui:
3860 case AArch64::STRWui:
3861 Scale = 4;
3862 return Opcode;
3863 case AArch64::LDRHui:
3864 case AArch64::STRHui:
3865 case AArch64::LDRHHui:
3866 case AArch64::STRHHui:
3867 case AArch64::LDRSHXui:
3868 case AArch64::LDRSHWui:
3869 Scale = 2;
3870 return Opcode;
3871 case AArch64::LDRBui:
3872 case AArch64::LDRBBui:
3873 case AArch64::LDRSBXui:
3874 case AArch64::LDRSBWui:
3875 case AArch64::STRBui:
3876 case AArch64::STRBBui:
3877 Scale = 1;
3878 return Opcode;
3879 }
3880}
3881
3882// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3883// the opcode of an instruction performing the same operation, but using the
3884// [Reg, #Imm] addressing mode with unscaled offset.
3885unsigned unscaledOffsetOpcode(unsigned Opcode) {
3886 switch (Opcode) {
3887 default:
3888 llvm_unreachable("Address folding not implemented for instruction");
3889
3890 case AArch64::LDURQi:
3891 case AArch64::STURQi:
3892 case AArch64::LDURDi:
3893 case AArch64::STURDi:
3894 case AArch64::LDURXi:
3895 case AArch64::STURXi:
3896 case AArch64::LDURWi:
3897 case AArch64::LDURSWi:
3898 case AArch64::STURWi:
3899 case AArch64::LDURHi:
3900 case AArch64::STURHi:
3901 case AArch64::LDURHHi:
3902 case AArch64::STURHHi:
3903 case AArch64::LDURSHXi:
3904 case AArch64::LDURSHWi:
3905 case AArch64::LDURBi:
3906 case AArch64::STURBi:
3907 case AArch64::LDURBBi:
3908 case AArch64::STURBBi:
3909 case AArch64::LDURSBWi:
3910 case AArch64::LDURSBXi:
3911 return Opcode;
3912 case AArch64::LDRQui:
3913 return AArch64::LDURQi;
3914 case AArch64::STRQui:
3915 return AArch64::STURQi;
3916 case AArch64::LDRDui:
3917 return AArch64::LDURDi;
3918 case AArch64::STRDui:
3919 return AArch64::STURDi;
3920 case AArch64::LDRXui:
3921 return AArch64::LDURXi;
3922 case AArch64::STRXui:
3923 return AArch64::STURXi;
3924 case AArch64::LDRWui:
3925 return AArch64::LDURWi;
3926 case AArch64::LDRSWui:
3927 return AArch64::LDURSWi;
3928 case AArch64::STRWui:
3929 return AArch64::STURWi;
3930 case AArch64::LDRHui:
3931 return AArch64::LDURHi;
3932 case AArch64::STRHui:
3933 return AArch64::STURHi;
3934 case AArch64::LDRHHui:
3935 return AArch64::LDURHHi;
3936 case AArch64::STRHHui:
3937 return AArch64::STURHHi;
3938 case AArch64::LDRSHXui:
3939 return AArch64::LDURSHXi;
3940 case AArch64::LDRSHWui:
3941 return AArch64::LDURSHWi;
3942 case AArch64::LDRBBui:
3943 return AArch64::LDURBBi;
3944 case AArch64::LDRBui:
3945 return AArch64::LDURBi;
3946 case AArch64::STRBBui:
3947 return AArch64::STURBBi;
3948 case AArch64::STRBui:
3949 return AArch64::STURBi;
3950 case AArch64::LDRSBWui:
3951 return AArch64::LDURSBWi;
3952 case AArch64::LDRSBXui:
3953 return AArch64::LDURSBXi;
3954 }
3955}
3956
3957// Given the opcode of a memory load/store instruction, return the opcode of an
3958// instruction performing the same operation, but using
3959// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3960// offset register.
3961static unsigned offsetExtendOpcode(unsigned Opcode) {
3962 switch (Opcode) {
3963 default:
3964 llvm_unreachable("Address folding not implemented for instruction");
3965
3966 case AArch64::LDRQroX:
3967 case AArch64::LDURQi:
3968 case AArch64::LDRQui:
3969 return AArch64::LDRQroW;
3970 case AArch64::STRQroX:
3971 case AArch64::STURQi:
3972 case AArch64::STRQui:
3973 return AArch64::STRQroW;
3974 case AArch64::LDRDroX:
3975 case AArch64::LDURDi:
3976 case AArch64::LDRDui:
3977 return AArch64::LDRDroW;
3978 case AArch64::STRDroX:
3979 case AArch64::STURDi:
3980 case AArch64::STRDui:
3981 return AArch64::STRDroW;
3982 case AArch64::LDRXroX:
3983 case AArch64::LDURXi:
3984 case AArch64::LDRXui:
3985 return AArch64::LDRXroW;
3986 case AArch64::STRXroX:
3987 case AArch64::STURXi:
3988 case AArch64::STRXui:
3989 return AArch64::STRXroW;
3990 case AArch64::LDRWroX:
3991 case AArch64::LDURWi:
3992 case AArch64::LDRWui:
3993 return AArch64::LDRWroW;
3994 case AArch64::LDRSWroX:
3995 case AArch64::LDURSWi:
3996 case AArch64::LDRSWui:
3997 return AArch64::LDRSWroW;
3998 case AArch64::STRWroX:
3999 case AArch64::STURWi:
4000 case AArch64::STRWui:
4001 return AArch64::STRWroW;
4002 case AArch64::LDRHroX:
4003 case AArch64::LDURHi:
4004 case AArch64::LDRHui:
4005 return AArch64::LDRHroW;
4006 case AArch64::STRHroX:
4007 case AArch64::STURHi:
4008 case AArch64::STRHui:
4009 return AArch64::STRHroW;
4010 case AArch64::LDRHHroX:
4011 case AArch64::LDURHHi:
4012 case AArch64::LDRHHui:
4013 return AArch64::LDRHHroW;
4014 case AArch64::STRHHroX:
4015 case AArch64::STURHHi:
4016 case AArch64::STRHHui:
4017 return AArch64::STRHHroW;
4018 case AArch64::LDRSHXroX:
4019 case AArch64::LDURSHXi:
4020 case AArch64::LDRSHXui:
4021 return AArch64::LDRSHXroW;
4022 case AArch64::LDRSHWroX:
4023 case AArch64::LDURSHWi:
4024 case AArch64::LDRSHWui:
4025 return AArch64::LDRSHWroW;
4026 case AArch64::LDRBroX:
4027 case AArch64::LDURBi:
4028 case AArch64::LDRBui:
4029 return AArch64::LDRBroW;
4030 case AArch64::LDRBBroX:
4031 case AArch64::LDURBBi:
4032 case AArch64::LDRBBui:
4033 return AArch64::LDRBBroW;
4034 case AArch64::LDRSBXroX:
4035 case AArch64::LDURSBXi:
4036 case AArch64::LDRSBXui:
4037 return AArch64::LDRSBXroW;
4038 case AArch64::LDRSBWroX:
4039 case AArch64::LDURSBWi:
4040 case AArch64::LDRSBWui:
4041 return AArch64::LDRSBWroW;
4042 case AArch64::STRBroX:
4043 case AArch64::STURBi:
4044 case AArch64::STRBui:
4045 return AArch64::STRBroW;
4046 case AArch64::STRBBroX:
4047 case AArch64::STURBBi:
4048 case AArch64::STRBBui:
4049 return AArch64::STRBBroW;
4050 }
4051}
4052
4054 const ExtAddrMode &AM) const {
4055
4056 const DebugLoc &DL = MemI.getDebugLoc();
4057 MachineBasicBlock &MBB = *MemI.getParent();
4059
4061 if (AM.ScaledReg) {
4062 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
4063 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
4064 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4065 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4066 .addReg(MemI.getOperand(0).getReg(),
4067 MemI.mayLoad() ? RegState::Define : 0)
4068 .addReg(AM.BaseReg)
4069 .addReg(AM.ScaledReg)
4070 .addImm(0)
4071 .addImm(AM.Scale > 1)
4072 .setMemRefs(MemI.memoperands())
4073 .setMIFlags(MemI.getFlags());
4074 return B.getInstr();
4075 }
4076
4077 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
4078 "Addressing mode not supported for folding");
4079
4080 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
4081 unsigned Scale = 1;
4082 unsigned Opcode = MemI.getOpcode();
4083 if (isInt<9>(AM.Displacement))
4084 Opcode = unscaledOffsetOpcode(Opcode);
4085 else
4086 Opcode = scaledOffsetOpcode(Opcode, Scale);
4087
4088 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4089 .addReg(MemI.getOperand(0).getReg(),
4090 MemI.mayLoad() ? RegState::Define : 0)
4091 .addReg(AM.BaseReg)
4092 .addImm(AM.Displacement / Scale)
4093 .setMemRefs(MemI.memoperands())
4094 .setMIFlags(MemI.getFlags());
4095 return B.getInstr();
4096 }
4097
4100 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
4101 assert(AM.ScaledReg && !AM.Displacement &&
4102 "Address offset can be a register or an immediate, but not both");
4103 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
4104 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4105 // Make sure the offset register is in the correct register class.
4106 Register OffsetReg = AM.ScaledReg;
4107 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
4108 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
4109 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4110 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
4111 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
4112 }
4113 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4114 .addReg(MemI.getOperand(0).getReg(),
4115 MemI.mayLoad() ? RegState::Define : 0)
4116 .addReg(AM.BaseReg)
4117 .addReg(OffsetReg)
4119 .addImm(AM.Scale != 1)
4120 .setMemRefs(MemI.memoperands())
4121 .setMIFlags(MemI.getFlags());
4122
4123 return B.getInstr();
4124 }
4125
4127 "Function must not be called with an addressing mode it can't handle");
4128}
4129
4130/// Return true if the opcode is a post-index ld/st instruction, which really
4131/// loads from base+0.
4132static bool isPostIndexLdStOpcode(unsigned Opcode) {
4133 switch (Opcode) {
4134 default:
4135 return false;
4136 case AArch64::LD1Fourv16b_POST:
4137 case AArch64::LD1Fourv1d_POST:
4138 case AArch64::LD1Fourv2d_POST:
4139 case AArch64::LD1Fourv2s_POST:
4140 case AArch64::LD1Fourv4h_POST:
4141 case AArch64::LD1Fourv4s_POST:
4142 case AArch64::LD1Fourv8b_POST:
4143 case AArch64::LD1Fourv8h_POST:
4144 case AArch64::LD1Onev16b_POST:
4145 case AArch64::LD1Onev1d_POST:
4146 case AArch64::LD1Onev2d_POST:
4147 case AArch64::LD1Onev2s_POST:
4148 case AArch64::LD1Onev4h_POST:
4149 case AArch64::LD1Onev4s_POST:
4150 case AArch64::LD1Onev8b_POST:
4151 case AArch64::LD1Onev8h_POST:
4152 case AArch64::LD1Rv16b_POST:
4153 case AArch64::LD1Rv1d_POST:
4154 case AArch64::LD1Rv2d_POST:
4155 case AArch64::LD1Rv2s_POST:
4156 case AArch64::LD1Rv4h_POST:
4157 case AArch64::LD1Rv4s_POST:
4158 case AArch64::LD1Rv8b_POST:
4159 case AArch64::LD1Rv8h_POST:
4160 case AArch64::LD1Threev16b_POST:
4161 case AArch64::LD1Threev1d_POST:
4162 case AArch64::LD1Threev2d_POST:
4163 case AArch64::LD1Threev2s_POST:
4164 case AArch64::LD1Threev4h_POST:
4165 case AArch64::LD1Threev4s_POST:
4166 case AArch64::LD1Threev8b_POST:
4167 case AArch64::LD1Threev8h_POST:
4168 case AArch64::LD1Twov16b_POST:
4169 case AArch64::LD1Twov1d_POST:
4170 case AArch64::LD1Twov2d_POST:
4171 case AArch64::LD1Twov2s_POST:
4172 case AArch64::LD1Twov4h_POST:
4173 case AArch64::LD1Twov4s_POST:
4174 case AArch64::LD1Twov8b_POST:
4175 case AArch64::LD1Twov8h_POST:
4176 case AArch64::LD1i16_POST:
4177 case AArch64::LD1i32_POST:
4178 case AArch64::LD1i64_POST:
4179 case AArch64::LD1i8_POST:
4180 case AArch64::LD2Rv16b_POST:
4181 case AArch64::LD2Rv1d_POST:
4182 case AArch64::LD2Rv2d_POST:
4183 case AArch64::LD2Rv2s_POST:
4184 case AArch64::LD2Rv4h_POST:
4185 case AArch64::LD2Rv4s_POST:
4186 case AArch64::LD2Rv8b_POST:
4187 case AArch64::LD2Rv8h_POST:
4188 case AArch64::LD2Twov16b_POST:
4189 case AArch64::LD2Twov2d_POST:
4190 case AArch64::LD2Twov2s_POST:
4191 case AArch64::LD2Twov4h_POST:
4192 case AArch64::LD2Twov4s_POST:
4193 case AArch64::LD2Twov8b_POST:
4194 case AArch64::LD2Twov8h_POST:
4195 case AArch64::LD2i16_POST:
4196 case AArch64::LD2i32_POST:
4197 case AArch64::LD2i64_POST:
4198 case AArch64::LD2i8_POST:
4199 case AArch64::LD3Rv16b_POST:
4200 case AArch64::LD3Rv1d_POST:
4201 case AArch64::LD3Rv2d_POST:
4202 case AArch64::LD3Rv2s_POST:
4203 case AArch64::LD3Rv4h_POST:
4204 case AArch64::LD3Rv4s_POST:
4205 case AArch64::LD3Rv8b_POST:
4206 case AArch64::LD3Rv8h_POST:
4207 case AArch64::LD3Threev16b_POST:
4208 case AArch64::LD3Threev2d_POST:
4209 case AArch64::LD3Threev2s_POST:
4210 case AArch64::LD3Threev4h_POST:
4211 case AArch64::LD3Threev4s_POST:
4212 case AArch64::LD3Threev8b_POST:
4213 case AArch64::LD3Threev8h_POST:
4214 case AArch64::LD3i16_POST:
4215 case AArch64::LD3i32_POST:
4216 case AArch64::LD3i64_POST:
4217 case AArch64::LD3i8_POST:
4218 case AArch64::LD4Fourv16b_POST:
4219 case AArch64::LD4Fourv2d_POST:
4220 case AArch64::LD4Fourv2s_POST:
4221 case AArch64::LD4Fourv4h_POST:
4222 case AArch64::LD4Fourv4s_POST:
4223 case AArch64::LD4Fourv8b_POST:
4224 case AArch64::LD4Fourv8h_POST:
4225 case AArch64::LD4Rv16b_POST:
4226 case AArch64::LD4Rv1d_POST:
4227 case AArch64::LD4Rv2d_POST:
4228 case AArch64::LD4Rv2s_POST:
4229 case AArch64::LD4Rv4h_POST:
4230 case AArch64::LD4Rv4s_POST:
4231 case AArch64::LD4Rv8b_POST:
4232 case AArch64::LD4Rv8h_POST:
4233 case AArch64::LD4i16_POST:
4234 case AArch64::LD4i32_POST:
4235 case AArch64::LD4i64_POST:
4236 case AArch64::LD4i8_POST:
4237 case AArch64::LDAPRWpost:
4238 case AArch64::LDAPRXpost:
4239 case AArch64::LDIAPPWpost:
4240 case AArch64::LDIAPPXpost:
4241 case AArch64::LDPDpost:
4242 case AArch64::LDPQpost:
4243 case AArch64::LDPSWpost:
4244 case AArch64::LDPSpost:
4245 case AArch64::LDPWpost:
4246 case AArch64::LDPXpost:
4247 case AArch64::LDRBBpost:
4248 case AArch64::LDRBpost:
4249 case AArch64::LDRDpost:
4250 case AArch64::LDRHHpost:
4251 case AArch64::LDRHpost:
4252 case AArch64::LDRQpost:
4253 case AArch64::LDRSBWpost:
4254 case AArch64::LDRSBXpost:
4255 case AArch64::LDRSHWpost:
4256 case AArch64::LDRSHXpost:
4257 case AArch64::LDRSWpost:
4258 case AArch64::LDRSpost:
4259 case AArch64::LDRWpost:
4260 case AArch64::LDRXpost:
4261 case AArch64::ST1Fourv16b_POST:
4262 case AArch64::ST1Fourv1d_POST:
4263 case AArch64::ST1Fourv2d_POST:
4264 case AArch64::ST1Fourv2s_POST:
4265 case AArch64::ST1Fourv4h_POST:
4266 case AArch64::ST1Fourv4s_POST:
4267 case AArch64::ST1Fourv8b_POST:
4268 case AArch64::ST1Fourv8h_POST:
4269 case AArch64::ST1Onev16b_POST:
4270 case AArch64::ST1Onev1d_POST:
4271 case AArch64::ST1Onev2d_POST:
4272 case AArch64::ST1Onev2s_POST:
4273 case AArch64::ST1Onev4h_POST:
4274 case AArch64::ST1Onev4s_POST:
4275 case AArch64::ST1Onev8b_POST:
4276 case AArch64::ST1Onev8h_POST:
4277 case AArch64::ST1Threev16b_POST:
4278 case AArch64::ST1Threev1d_POST:
4279 case AArch64::ST1Threev2d_POST:
4280 case AArch64::ST1Threev2s_POST:
4281 case AArch64::ST1Threev4h_POST:
4282 case AArch64::ST1Threev4s_POST:
4283 case AArch64::ST1Threev8b_POST:
4284 case AArch64::ST1Threev8h_POST:
4285 case AArch64::ST1Twov16b_POST:
4286 case AArch64::ST1Twov1d_POST:
4287 case AArch64::ST1Twov2d_POST:
4288 case AArch64::ST1Twov2s_POST:
4289 case AArch64::ST1Twov4h_POST:
4290 case AArch64::ST1Twov4s_POST:
4291 case AArch64::ST1Twov8b_POST:
4292 case AArch64::ST1Twov8h_POST:
4293 case AArch64::ST1i16_POST:
4294 case AArch64::ST1i32_POST:
4295 case AArch64::ST1i64_POST:
4296 case AArch64::ST1i8_POST:
4297 case AArch64::ST2GPostIndex:
4298 case AArch64::ST2Twov16b_POST:
4299 case AArch64::ST2Twov2d_POST:
4300 case AArch64::ST2Twov2s_POST:
4301 case AArch64::ST2Twov4h_POST:
4302 case AArch64::ST2Twov4s_POST:
4303 case AArch64::ST2Twov8b_POST:
4304 case AArch64::ST2Twov8h_POST:
4305 case AArch64::ST2i16_POST:
4306 case AArch64::ST2i32_POST:
4307 case AArch64::ST2i64_POST:
4308 case AArch64::ST2i8_POST:
4309 case AArch64::ST3Threev16b_POST:
4310 case AArch64::ST3Threev2d_POST:
4311 case AArch64::ST3Threev2s_POST:
4312 case AArch64::ST3Threev4h_POST:
4313 case AArch64::ST3Threev4s_POST:
4314 case AArch64::ST3Threev8b_POST:
4315 case AArch64::ST3Threev8h_POST:
4316 case AArch64::ST3i16_POST:
4317 case AArch64::ST3i32_POST:
4318 case AArch64::ST3i64_POST:
4319 case AArch64::ST3i8_POST:
4320 case AArch64::ST4Fourv16b_POST:
4321 case AArch64::ST4Fourv2d_POST:
4322 case AArch64::ST4Fourv2s_POST:
4323 case AArch64::ST4Fourv4h_POST:
4324 case AArch64::ST4Fourv4s_POST:
4325 case AArch64::ST4Fourv8b_POST:
4326 case AArch64::ST4Fourv8h_POST:
4327 case AArch64::ST4i16_POST:
4328 case AArch64::ST4i32_POST:
4329 case AArch64::ST4i64_POST:
4330 case AArch64::ST4i8_POST:
4331 case AArch64::STGPostIndex:
4332 case AArch64::STGPpost:
4333 case AArch64::STPDpost:
4334 case AArch64::STPQpost:
4335 case AArch64::STPSpost:
4336 case AArch64::STPWpost:
4337 case AArch64::STPXpost:
4338 case AArch64::STRBBpost:
4339 case AArch64::STRBpost:
4340 case AArch64::STRDpost:
4341 case AArch64::STRHHpost:
4342 case AArch64::STRHpost:
4343 case AArch64::STRQpost:
4344 case AArch64::STRSpost:
4345 case AArch64::STRWpost:
4346 case AArch64::STRXpost:
4347 case AArch64::STZ2GPostIndex:
4348 case AArch64::STZGPostIndex:
4349 return true;
4350 }
4351}
4352
4354 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4355 bool &OffsetIsScalable, TypeSize &Width,
4356 const TargetRegisterInfo *TRI) const {
4357 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4358 // Handle only loads/stores with base register followed by immediate offset.
4359 if (LdSt.getNumExplicitOperands() == 3) {
4360 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4361 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4362 !LdSt.getOperand(2).isImm())
4363 return false;
4364 } else if (LdSt.getNumExplicitOperands() == 4) {
4365 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4366 if (!LdSt.getOperand(1).isReg() ||
4367 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4368 !LdSt.getOperand(3).isImm())
4369 return false;
4370 } else
4371 return false;
4372
4373 // Get the scaling factor for the instruction and set the width for the
4374 // instruction.
4375 TypeSize Scale(0U, false);
4376 int64_t Dummy1, Dummy2;
4377
4378 // If this returns false, then it's an instruction we don't want to handle.
4379 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4380 return false;
4381
4382 // Compute the offset. Offset is calculated as the immediate operand
4383 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4384 // set to 1. Postindex are a special case which have an offset of 0.
4385 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4386 BaseOp = &LdSt.getOperand(2);
4387 Offset = 0;
4388 } else if (LdSt.getNumExplicitOperands() == 3) {
4389 BaseOp = &LdSt.getOperand(1);
4390 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4391 } else {
4392 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4393 BaseOp = &LdSt.getOperand(2);
4394 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4395 }
4396 OffsetIsScalable = Scale.isScalable();
4397
4398 return BaseOp->isReg() || BaseOp->isFI();
4399}
4400
4403 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4404 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4405 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4406 return OfsOp;
4407}
4408
4409bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4410 TypeSize &Width, int64_t &MinOffset,
4411 int64_t &MaxOffset) {
4412 switch (Opcode) {
4413 // Not a memory operation or something we want to handle.
4414 default:
4415 Scale = TypeSize::getFixed(0);
4416 Width = TypeSize::getFixed(0);
4417 MinOffset = MaxOffset = 0;
4418 return false;
4419 // LDR / STR
4420 case AArch64::LDRQui:
4421 case AArch64::STRQui:
4422 Scale = TypeSize::getFixed(16);
4423 Width = TypeSize::getFixed(16);
4424 MinOffset = 0;
4425 MaxOffset = 4095;
4426 break;
4427 case AArch64::LDRXui:
4428 case AArch64::LDRDui:
4429 case AArch64::STRXui:
4430 case AArch64::STRDui:
4431 case AArch64::PRFMui:
4432 Scale = TypeSize::getFixed(8);
4433 Width = TypeSize::getFixed(8);
4434 MinOffset = 0;
4435 MaxOffset = 4095;
4436 break;
4437 case AArch64::LDRWui:
4438 case AArch64::LDRSui:
4439 case AArch64::LDRSWui:
4440 case AArch64::STRWui:
4441 case AArch64::STRSui:
4442 Scale = TypeSize::getFixed(4);
4443 Width = TypeSize::getFixed(4);
4444 MinOffset = 0;
4445 MaxOffset = 4095;
4446 break;
4447 case AArch64::LDRHui:
4448 case AArch64::LDRHHui:
4449 case AArch64::LDRSHWui:
4450 case AArch64::LDRSHXui:
4451 case AArch64::STRHui:
4452 case AArch64::STRHHui:
4453 Scale = TypeSize::getFixed(2);
4454 Width = TypeSize::getFixed(2);
4455 MinOffset = 0;
4456 MaxOffset = 4095;
4457 break;
4458 case AArch64::LDRBui:
4459 case AArch64::LDRBBui:
4460 case AArch64::LDRSBWui:
4461 case AArch64::LDRSBXui:
4462 case AArch64::STRBui:
4463 case AArch64::STRBBui:
4464 Scale = TypeSize::getFixed(1);
4465 Width = TypeSize::getFixed(1);
4466 MinOffset = 0;
4467 MaxOffset = 4095;
4468 break;
4469 // post/pre inc
4470 case AArch64::STRQpre:
4471 case AArch64::LDRQpost:
4472 Scale = TypeSize::getFixed(1);
4473 Width = TypeSize::getFixed(16);
4474 MinOffset = -256;
4475 MaxOffset = 255;
4476 break;
4477 case AArch64::LDRDpost:
4478 case AArch64::LDRDpre:
4479 case AArch64::LDRXpost:
4480 case AArch64::LDRXpre:
4481 case AArch64::STRDpost:
4482 case AArch64::STRDpre:
4483 case AArch64::STRXpost:
4484 case AArch64::STRXpre:
4485 Scale = TypeSize::getFixed(1);
4486 Width = TypeSize::getFixed(8);
4487 MinOffset = -256;
4488 MaxOffset = 255;
4489 break;
4490 case AArch64::STRWpost:
4491 case AArch64::STRWpre:
4492 case AArch64::LDRWpost:
4493 case AArch64::LDRWpre:
4494 case AArch64::STRSpost:
4495 case AArch64::STRSpre:
4496 case AArch64::LDRSpost:
4497 case AArch64::LDRSpre:
4498 Scale = TypeSize::getFixed(1);
4499 Width = TypeSize::getFixed(4);
4500 MinOffset = -256;
4501 MaxOffset = 255;
4502 break;
4503 case AArch64::LDRHpost:
4504 case AArch64::LDRHpre:
4505 case AArch64::STRHpost:
4506 case AArch64::STRHpre:
4507 case AArch64::LDRHHpost:
4508 case AArch64::LDRHHpre:
4509 case AArch64::STRHHpost:
4510 case AArch64::STRHHpre:
4511 Scale = TypeSize::getFixed(1);
4512 Width = TypeSize::getFixed(2);
4513 MinOffset = -256;
4514 MaxOffset = 255;
4515 break;
4516 case AArch64::LDRBpost:
4517 case AArch64::LDRBpre:
4518 case AArch64::STRBpost:
4519 case AArch64::STRBpre:
4520 case AArch64::LDRBBpost:
4521 case AArch64::LDRBBpre:
4522 case AArch64::STRBBpost:
4523 case AArch64::STRBBpre:
4524 Scale = TypeSize::getFixed(1);
4525 Width = TypeSize::getFixed(1);
4526 MinOffset = -256;
4527 MaxOffset = 255;
4528 break;
4529 // Unscaled
4530 case AArch64::LDURQi:
4531 case AArch64::STURQi:
4532 Scale = TypeSize::getFixed(1);
4533 Width = TypeSize::getFixed(16);
4534 MinOffset = -256;
4535 MaxOffset = 255;
4536 break;
4537 case AArch64::LDURXi:
4538 case AArch64::LDURDi:
4539 case AArch64::LDAPURXi:
4540 case AArch64::STURXi:
4541 case AArch64::STURDi:
4542 case AArch64::STLURXi:
4543 case AArch64::PRFUMi:
4544 Scale = TypeSize::getFixed(1);
4545 Width = TypeSize::getFixed(8);
4546 MinOffset = -256;
4547 MaxOffset = 255;
4548 break;
4549 case AArch64::LDURWi:
4550 case AArch64::LDURSi:
4551 case AArch64::LDURSWi:
4552 case AArch64::LDAPURi:
4553 case AArch64::LDAPURSWi:
4554 case AArch64::STURWi:
4555 case AArch64::STURSi:
4556 case AArch64::STLURWi:
4557 Scale = TypeSize::getFixed(1);
4558 Width = TypeSize::getFixed(4);
4559 MinOffset = -256;
4560 MaxOffset = 255;
4561 break;
4562 case AArch64::LDURHi:
4563 case AArch64::LDURHHi:
4564 case AArch64::LDURSHXi:
4565 case AArch64::LDURSHWi:
4566 case AArch64::LDAPURHi:
4567 case AArch64::LDAPURSHWi:
4568 case AArch64::LDAPURSHXi:
4569 case AArch64::STURHi:
4570 case AArch64::STURHHi:
4571 case AArch64::STLURHi:
4572 Scale = TypeSize::getFixed(1);
4573 Width = TypeSize::getFixed(2);
4574 MinOffset = -256;
4575 MaxOffset = 255;
4576 break;
4577 case AArch64::LDURBi:
4578 case AArch64::LDURBBi:
4579 case AArch64::LDURSBXi:
4580 case AArch64::LDURSBWi:
4581 case AArch64::LDAPURBi:
4582 case AArch64::LDAPURSBWi:
4583 case AArch64::LDAPURSBXi:
4584 case AArch64::STURBi:
4585 case AArch64::STURBBi:
4586 case AArch64::STLURBi:
4587 Scale = TypeSize::getFixed(1);
4588 Width = TypeSize::getFixed(1);
4589 MinOffset = -256;
4590 MaxOffset = 255;
4591 break;
4592 // LDP / STP (including pre/post inc)
4593 case AArch64::LDPQi:
4594 case AArch64::LDNPQi:
4595 case AArch64::STPQi:
4596 case AArch64::STNPQi:
4597 case AArch64::LDPQpost:
4598 case AArch64::LDPQpre:
4599 case AArch64::STPQpost:
4600 case AArch64::STPQpre:
4601 Scale = TypeSize::getFixed(16);
4602 Width = TypeSize::getFixed(16 * 2);
4603 MinOffset = -64;
4604 MaxOffset = 63;
4605 break;
4606 case AArch64::LDPXi:
4607 case AArch64::LDPDi:
4608 case AArch64::LDNPXi:
4609 case AArch64::LDNPDi:
4610 case AArch64::STPXi:
4611 case AArch64::STPDi:
4612 case AArch64::STNPXi:
4613 case AArch64::STNPDi:
4614 case AArch64::LDPDpost:
4615 case AArch64::LDPDpre:
4616 case AArch64::LDPXpost:
4617 case AArch64::LDPXpre:
4618 case AArch64::STPDpost:
4619 case AArch64::STPDpre:
4620 case AArch64::STPXpost:
4621 case AArch64::STPXpre:
4622 Scale = TypeSize::getFixed(8);
4623 Width = TypeSize::getFixed(8 * 2);
4624 MinOffset = -64;
4625 MaxOffset = 63;
4626 break;
4627 case AArch64::LDPWi:
4628 case AArch64::LDPSi:
4629 case AArch64::LDNPWi:
4630 case AArch64::LDNPSi:
4631 case AArch64::STPWi:
4632 case AArch64::STPSi:
4633 case AArch64::STNPWi:
4634 case AArch64::STNPSi:
4635 case AArch64::LDPSpost:
4636 case AArch64::LDPSpre:
4637 case AArch64::LDPWpost:
4638 case AArch64::LDPWpre:
4639 case AArch64::STPSpost:
4640 case AArch64::STPSpre:
4641 case AArch64::STPWpost:
4642 case AArch64::STPWpre:
4643 Scale = TypeSize::getFixed(4);
4644 Width = TypeSize::getFixed(4 * 2);
4645 MinOffset = -64;
4646 MaxOffset = 63;
4647 break;
4648 case AArch64::StoreSwiftAsyncContext:
4649 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4650 Scale = TypeSize::getFixed(1);
4651 Width = TypeSize::getFixed(8);
4652 MinOffset = 0;
4653 MaxOffset = 4095;
4654 break;
4655 case AArch64::ADDG:
4656 Scale = TypeSize::getFixed(16);
4657 Width = TypeSize::getFixed(0);
4658 MinOffset = 0;
4659 MaxOffset = 63;
4660 break;
4661 case AArch64::TAGPstack:
4662 Scale = TypeSize::getFixed(16);
4663 Width = TypeSize::getFixed(0);
4664 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4665 // of 63 (not 64!).
4666 MinOffset = -63;
4667 MaxOffset = 63;
4668 break;
4669 case AArch64::LDG:
4670 case AArch64::STGi:
4671 case AArch64::STGPreIndex:
4672 case AArch64::STGPostIndex:
4673 case AArch64::STZGi:
4674 case AArch64::STZGPreIndex:
4675 case AArch64::STZGPostIndex:
4676 Scale = TypeSize::getFixed(16);
4677 Width = TypeSize::getFixed(16);
4678 MinOffset = -256;
4679 MaxOffset = 255;
4680 break;
4681 // SVE
4682 case AArch64::STR_ZZZZXI:
4683 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4684 case AArch64::LDR_ZZZZXI:
4685 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4686 Scale = TypeSize::getScalable(16);
4687 Width = TypeSize::getScalable(16 * 4);
4688 MinOffset = -256;
4689 MaxOffset = 252;
4690 break;
4691 case AArch64::STR_ZZZXI:
4692 case AArch64::LDR_ZZZXI:
4693 Scale = TypeSize::getScalable(16);
4694 Width = TypeSize::getScalable(16 * 3);
4695 MinOffset = -256;
4696 MaxOffset = 253;
4697 break;
4698 case AArch64::STR_ZZXI:
4699 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4700 case AArch64::LDR_ZZXI:
4701 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4702 Scale = TypeSize::getScalable(16);
4703 Width = TypeSize::getScalable(16 * 2);
4704 MinOffset = -256;
4705 MaxOffset = 254;
4706 break;
4707 case AArch64::LDR_PXI:
4708 case AArch64::STR_PXI:
4709 Scale = TypeSize::getScalable(2);
4710 Width = TypeSize::getScalable(2);
4711 MinOffset = -256;
4712 MaxOffset = 255;
4713 break;
4714 case AArch64::LDR_PPXI:
4715 case AArch64::STR_PPXI:
4716 Scale = TypeSize::getScalable(2);
4717 Width = TypeSize::getScalable(2 * 2);
4718 MinOffset = -256;
4719 MaxOffset = 254;
4720 break;
4721 case AArch64::LDR_ZXI:
4722 case AArch64::STR_ZXI:
4723 Scale = TypeSize::getScalable(16);
4724 Width = TypeSize::getScalable(16);
4725 MinOffset = -256;
4726 MaxOffset = 255;
4727 break;
4728 case AArch64::LD1B_IMM:
4729 case AArch64::LD1H_IMM:
4730 case AArch64::LD1W_IMM:
4731 case AArch64::LD1D_IMM:
4732 case AArch64::LDNT1B_ZRI:
4733 case AArch64::LDNT1H_ZRI:
4734 case AArch64::LDNT1W_ZRI:
4735 case AArch64::LDNT1D_ZRI:
4736 case AArch64::ST1B_IMM:
4737 case AArch64::ST1H_IMM:
4738 case AArch64::ST1W_IMM:
4739 case AArch64::ST1D_IMM:
4740 case AArch64::STNT1B_ZRI:
4741 case AArch64::STNT1H_ZRI:
4742 case AArch64::STNT1W_ZRI:
4743 case AArch64::STNT1D_ZRI:
4744 case AArch64::LDNF1B_IMM:
4745 case AArch64::LDNF1H_IMM:
4746 case AArch64::LDNF1W_IMM:
4747 case AArch64::LDNF1D_IMM:
4748 // A full vectors worth of data
4749 // Width = mbytes * elements
4750 Scale = TypeSize::getScalable(16);
4751 Width = TypeSize::getScalable(16);
4752 MinOffset = -8;
4753 MaxOffset = 7;
4754 break;
4755 case AArch64::LD2B_IMM:
4756 case AArch64::LD2H_IMM:
4757 case AArch64::LD2W_IMM:
4758 case AArch64::LD2D_IMM:
4759 case AArch64::ST2B_IMM:
4760 case AArch64::ST2H_IMM:
4761 case AArch64::ST2W_IMM:
4762 case AArch64::ST2D_IMM:
4763 Scale = TypeSize::getScalable(32);
4764 Width = TypeSize::getScalable(16 * 2);
4765 MinOffset = -8;
4766 MaxOffset = 7;
4767 break;
4768 case AArch64::LD3B_IMM:
4769 case AArch64::LD3H_IMM:
4770 case AArch64::LD3W_IMM:
4771 case AArch64::LD3D_IMM:
4772 case AArch64::ST3B_IMM:
4773 case AArch64::ST3H_IMM:
4774 case AArch64::ST3W_IMM:
4775 case AArch64::ST3D_IMM:
4776 Scale = TypeSize::getScalable(48);
4777 Width = TypeSize::getScalable(16 * 3);
4778 MinOffset = -8;
4779 MaxOffset = 7;
4780 break;
4781 case AArch64::LD4B_IMM:
4782 case AArch64::LD4H_IMM:
4783 case AArch64::LD4W_IMM:
4784 case AArch64::LD4D_IMM:
4785 case AArch64::ST4B_IMM:
4786 case AArch64::ST4H_IMM:
4787 case AArch64::ST4W_IMM:
4788 case AArch64::ST4D_IMM:
4789 Scale = TypeSize::getScalable(64);
4790 Width = TypeSize::getScalable(16 * 4);
4791 MinOffset = -8;
4792 MaxOffset = 7;
4793 break;
4794 case AArch64::LD1B_H_IMM:
4795 case AArch64::LD1SB_H_IMM:
4796 case AArch64::LD1H_S_IMM:
4797 case AArch64::LD1SH_S_IMM:
4798 case AArch64::LD1W_D_IMM:
4799 case AArch64::LD1SW_D_IMM:
4800 case AArch64::ST1B_H_IMM:
4801 case AArch64::ST1H_S_IMM:
4802 case AArch64::ST1W_D_IMM:
4803 case AArch64::LDNF1B_H_IMM:
4804 case AArch64::LDNF1SB_H_IMM:
4805 case AArch64::LDNF1H_S_IMM:
4806 case AArch64::LDNF1SH_S_IMM:
4807 case AArch64::LDNF1W_D_IMM:
4808 case AArch64::LDNF1SW_D_IMM:
4809 // A half vector worth of data
4810 // Width = mbytes * elements
4811 Scale = TypeSize::getScalable(8);
4812 Width = TypeSize::getScalable(8);
4813 MinOffset = -8;
4814 MaxOffset = 7;
4815 break;
4816 case AArch64::LD1B_S_IMM:
4817 case AArch64::LD1SB_S_IMM:
4818 case AArch64::LD1H_D_IMM:
4819 case AArch64::LD1SH_D_IMM:
4820 case AArch64::ST1B_S_IMM:
4821 case AArch64::ST1H_D_IMM:
4822 case AArch64::LDNF1B_S_IMM:
4823 case AArch64::LDNF1SB_S_IMM:
4824 case AArch64::LDNF1H_D_IMM:
4825 case AArch64::LDNF1SH_D_IMM:
4826 // A quarter vector worth of data
4827 // Width = mbytes * elements
4828 Scale = TypeSize::getScalable(4);
4829 Width = TypeSize::getScalable(4);
4830 MinOffset = -8;
4831 MaxOffset = 7;
4832 break;
4833 case AArch64::LD1B_D_IMM:
4834 case AArch64::LD1SB_D_IMM:
4835 case AArch64::ST1B_D_IMM:
4836 case AArch64::LDNF1B_D_IMM:
4837 case AArch64::LDNF1SB_D_IMM:
4838 // A eighth vector worth of data
4839 // Width = mbytes * elements
4840 Scale = TypeSize::getScalable(2);
4841 Width = TypeSize::getScalable(2);
4842 MinOffset = -8;
4843 MaxOffset = 7;
4844 break;
4845 case AArch64::ST2Gi:
4846 case AArch64::ST2GPreIndex:
4847 case AArch64::ST2GPostIndex:
4848 case AArch64::STZ2Gi:
4849 case AArch64::STZ2GPreIndex:
4850 case AArch64::STZ2GPostIndex:
4851 Scale = TypeSize::getFixed(16);
4852 Width = TypeSize::getFixed(32);
4853 MinOffset = -256;
4854 MaxOffset = 255;
4855 break;
4856 case AArch64::STGPi:
4857 case AArch64::STGPpost:
4858 case AArch64::STGPpre:
4859 Scale = TypeSize::getFixed(16);
4860 Width = TypeSize::getFixed(16);
4861 MinOffset = -64;
4862 MaxOffset = 63;
4863 break;
4864 case AArch64::LD1RB_IMM:
4865 case AArch64::LD1RB_H_IMM:
4866 case AArch64::LD1RB_S_IMM:
4867 case AArch64::LD1RB_D_IMM:
4868 case AArch64::LD1RSB_H_IMM:
4869 case AArch64::LD1RSB_S_IMM:
4870 case AArch64::LD1RSB_D_IMM:
4871 Scale = TypeSize::getFixed(1);
4872 Width = TypeSize::getFixed(1);
4873 MinOffset = 0;
4874 MaxOffset = 63;
4875 break;
4876 case AArch64::LD1RH_IMM:
4877 case AArch64::LD1RH_S_IMM:
4878 case AArch64::LD1RH_D_IMM:
4879 case AArch64::LD1RSH_S_IMM:
4880 case AArch64::LD1RSH_D_IMM:
4881 Scale = TypeSize::getFixed(2);
4882 Width = TypeSize::getFixed(2);
4883 MinOffset = 0;
4884 MaxOffset = 63;
4885 break;
4886 case AArch64::LD1RW_IMM:
4887 case AArch64::LD1RW_D_IMM:
4888 case AArch64::LD1RSW_IMM:
4889 Scale = TypeSize::getFixed(4);
4890 Width = TypeSize::getFixed(4);
4891 MinOffset = 0;
4892 MaxOffset = 63;
4893 break;
4894 case AArch64::LD1RD_IMM:
4895 Scale = TypeSize::getFixed(8);
4896 Width = TypeSize::getFixed(8);
4897 MinOffset = 0;
4898 MaxOffset = 63;
4899 break;
4900 }
4901
4902 return true;
4903}
4904
4905// Scaling factor for unscaled load or store.
4907 switch (Opc) {
4908 default:
4909 llvm_unreachable("Opcode has unknown scale!");
4910 case AArch64::LDRBBui:
4911 case AArch64::LDURBBi:
4912 case AArch64::LDRSBWui:
4913 case AArch64::LDURSBWi:
4914 case AArch64::STRBBui:
4915 case AArch64::STURBBi:
4916 return 1;
4917 case AArch64::LDRHHui:
4918 case AArch64::LDURHHi:
4919 case AArch64::LDRSHWui:
4920 case AArch64::LDURSHWi:
4921 case AArch64::STRHHui:
4922 case AArch64::STURHHi:
4923 return 2;
4924 case AArch64::LDRSui:
4925 case AArch64::LDURSi:
4926 case AArch64::LDRSpre:
4927 case AArch64::LDRSWui:
4928 case AArch64::LDURSWi:
4929 case AArch64::LDRSWpre:
4930 case AArch64::LDRWpre:
4931 case AArch64::LDRWui:
4932 case AArch64::LDURWi:
4933 case AArch64::STRSui:
4934 case AArch64::STURSi:
4935 case AArch64::STRSpre:
4936 case AArch64::STRWui:
4937 case AArch64::STURWi:
4938 case AArch64::STRWpre:
4939 case AArch64::LDPSi:
4940 case AArch64::LDPSWi:
4941 case AArch64::LDPWi:
4942 case AArch64::STPSi:
4943 case AArch64::STPWi:
4944 return 4;
4945 case AArch64::LDRDui:
4946 case AArch64::LDURDi:
4947 case AArch64::LDRDpre:
4948 case AArch64::LDRXui:
4949 case AArch64::LDURXi:
4950 case AArch64::LDRXpre:
4951 case AArch64::STRDui:
4952 case AArch64::STURDi:
4953 case AArch64::STRDpre:
4954 case AArch64::STRXui:
4955 case AArch64::STURXi:
4956 case AArch64::STRXpre:
4957 case AArch64::LDPDi:
4958 case AArch64::LDPXi:
4959 case AArch64::STPDi:
4960 case AArch64::STPXi:
4961 return 8;
4962 case AArch64::LDRQui:
4963 case AArch64::LDURQi:
4964 case AArch64::STRQui:
4965 case AArch64::STURQi:
4966 case AArch64::STRQpre:
4967 case AArch64::LDPQi:
4968 case AArch64::LDRQpre:
4969 case AArch64::STPQi:
4970 case AArch64::STGi:
4971 case AArch64::STZGi:
4972 case AArch64::ST2Gi:
4973 case AArch64::STZ2Gi:
4974 case AArch64::STGPi:
4975 return 16;
4976 }
4977}
4978
4980 switch (MI.getOpcode()) {
4981 default:
4982 return false;
4983 case AArch64::LDRWpre:
4984 case AArch64::LDRXpre:
4985 case AArch64::LDRSWpre:
4986 case AArch64::LDRSpre:
4987 case AArch64::LDRDpre:
4988 case AArch64::LDRQpre:
4989 return true;
4990 }
4991}
4992
4994 switch (MI.getOpcode()) {
4995 default:
4996 return false;
4997 case AArch64::STRWpre:
4998 case AArch64::STRXpre:
4999 case AArch64::STRSpre:
5000 case AArch64::STRDpre:
5001 case AArch64::STRQpre:
5002 return true;
5003 }
5004}
5005
5007 return isPreLd(MI) || isPreSt(MI);
5008}
5009
5011 switch (MI.getOpcode()) {
5012 default:
5013 return false;
5014 case AArch64::LDPSi:
5015 case AArch64::LDPSWi:
5016 case AArch64::LDPDi:
5017 case AArch64::LDPQi:
5018 case AArch64::LDPWi:
5019 case AArch64::LDPXi:
5020 case AArch64::STPSi:
5021 case AArch64::STPDi:
5022 case AArch64::STPQi:
5023 case AArch64::STPWi:
5024 case AArch64::STPXi:
5025 case AArch64::STGPi:
5026 return true;
5027 }
5028}
5029
5031 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
5032 unsigned Idx =
5034 : 1;
5035 return MI.getOperand(Idx);
5036}
5037
5038const MachineOperand &
5040 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
5041 unsigned Idx =
5043 : 2;
5044 return MI.getOperand(Idx);
5045}
5046
5047const MachineOperand &
5049 switch (MI.getOpcode()) {
5050 default:
5051 llvm_unreachable("Unexpected opcode");
5052 case AArch64::LDRBroX:
5053 case AArch64::LDRBBroX:
5054 case AArch64::LDRSBXroX:
5055 case AArch64::LDRSBWroX:
5056 case AArch64::LDRHroX:
5057 case AArch64::LDRHHroX:
5058 case AArch64::LDRSHXroX:
5059 case AArch64::LDRSHWroX:
5060 case AArch64::LDRWroX:
5061 case AArch64::LDRSroX:
5062 case AArch64::LDRSWroX:
5063 case AArch64::LDRDroX:
5064 case AArch64::LDRXroX:
5065 case AArch64::LDRQroX:
5066 return MI.getOperand(4);
5067 }
5068}
5069
5071 Register Reg) {
5072 if (MI.getParent() == nullptr)
5073 return nullptr;
5074 const MachineFunction *MF = MI.getParent()->getParent();
5075 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
5076}
5077
5079 auto IsHFPR = [&](const MachineOperand &Op) {
5080 if (!Op.isReg())
5081 return false;
5082 auto Reg = Op.getReg();
5083 if (Reg.isPhysical())
5084 return AArch64::FPR16RegClass.contains(Reg);
5085 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5086 return TRC == &AArch64::FPR16RegClass ||
5087 TRC == &AArch64::FPR16_loRegClass;
5088 };
5089 return llvm::any_of(MI.operands(), IsHFPR);
5090}
5091
5093 auto IsQFPR = [&](const MachineOperand &Op) {
5094 if (!Op.isReg())
5095 return false;
5096 auto Reg = Op.getReg();
5097 if (Reg.isPhysical())
5098 return AArch64::FPR128RegClass.contains(Reg);
5099 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5100 return TRC == &AArch64::FPR128RegClass ||
5101 TRC == &AArch64::FPR128_loRegClass;
5102 };
5103 return llvm::any_of(MI.operands(), IsQFPR);
5104}
5105
5107 switch (MI.getOpcode()) {
5108 case AArch64::BRK:
5109 case AArch64::HLT:
5110 case AArch64::PACIASP:
5111 case AArch64::PACIBSP:
5112 // Implicit BTI behavior.
5113 return true;
5114 case AArch64::PAUTH_PROLOGUE:
5115 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
5116 return true;
5117 case AArch64::HINT: {
5118 unsigned Imm = MI.getOperand(0).getImm();
5119 // Explicit BTI instruction.
5120 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
5121 return true;
5122 // PACI(A|B)SP instructions.
5123 if (Imm == 25 || Imm == 27)
5124 return true;
5125 return false;
5126 }
5127 default:
5128 return false;
5129 }
5130}
5131
5133 if (Reg == 0)
5134 return false;
5135 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
5136 return AArch64::FPR128RegClass.contains(Reg) ||
5137 AArch64::FPR64RegClass.contains(Reg) ||
5138 AArch64::FPR32RegClass.contains(Reg) ||
5139 AArch64::FPR16RegClass.contains(Reg) ||
5140 AArch64::FPR8RegClass.contains(Reg);
5141}
5142
5144 auto IsFPR = [&](const MachineOperand &Op) {
5145 if (!Op.isReg())
5146 return false;
5147 auto Reg = Op.getReg();
5148 if (Reg.isPhysical())
5149 return isFpOrNEON(Reg);
5150
5151 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5152 return TRC == &AArch64::FPR128RegClass ||
5153 TRC == &AArch64::FPR128_loRegClass ||
5154 TRC == &AArch64::FPR64RegClass ||
5155 TRC == &AArch64::FPR64_loRegClass ||
5156 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
5157 TRC == &AArch64::FPR8RegClass;
5158 };
5159 return llvm::any_of(MI.operands(), IsFPR);
5160}
5161
5162// Scale the unscaled offsets. Returns false if the unscaled offset can't be
5163// scaled.
5164static bool scaleOffset(unsigned Opc, int64_t &Offset) {
5166
5167 // If the byte-offset isn't a multiple of the stride, we can't scale this
5168 // offset.
5169 if (Offset % Scale != 0)
5170 return false;
5171
5172 // Convert the byte-offset used by unscaled into an "element" offset used
5173 // by the scaled pair load/store instructions.
5174 Offset /= Scale;
5175 return true;
5176}
5177
5178static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
5179 if (FirstOpc == SecondOpc)
5180 return true;
5181 // We can also pair sign-ext and zero-ext instructions.
5182 switch (FirstOpc) {
5183 default:
5184 return false;
5185 case AArch64::STRSui:
5186 case AArch64::STURSi:
5187 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
5188 case AArch64::STRDui:
5189 case AArch64::STURDi:
5190 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
5191 case AArch64::STRQui:
5192 case AArch64::STURQi:
5193 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
5194 case AArch64::STRWui:
5195 case AArch64::STURWi:
5196 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
5197 case AArch64::STRXui:
5198 case AArch64::STURXi:
5199 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
5200 case AArch64::LDRSui:
5201 case AArch64::LDURSi:
5202 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
5203 case AArch64::LDRDui:
5204 case AArch64::LDURDi:
5205 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
5206 case AArch64::LDRQui:
5207 case AArch64::LDURQi:
5208 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
5209 case AArch64::LDRWui:
5210 case AArch64::LDURWi:
5211 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
5212 case AArch64::LDRSWui:
5213 case AArch64::LDURSWi:
5214 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
5215 case AArch64::LDRXui:
5216 case AArch64::LDURXi:
5217 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
5218 }
5219 // These instructions can't be paired based on their opcodes.
5220 return false;
5221}
5222
5223static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
5224 int64_t Offset1, unsigned Opcode1, int FI2,
5225 int64_t Offset2, unsigned Opcode2) {
5226 // Accesses through fixed stack object frame indices may access a different
5227 // fixed stack slot. Check that the object offsets + offsets match.
5228 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
5229 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
5230 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
5231 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
5232 // Convert to scaled object offsets.
5233 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
5234 if (ObjectOffset1 % Scale1 != 0)
5235 return false;
5236 ObjectOffset1 /= Scale1;
5237 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
5238 if (ObjectOffset2 % Scale2 != 0)
5239 return false;
5240 ObjectOffset2 /= Scale2;
5241 ObjectOffset1 += Offset1;
5242 ObjectOffset2 += Offset2;
5243 return ObjectOffset1 + 1 == ObjectOffset2;
5244 }
5245
5246 return FI1 == FI2;
5247}
5248
5249/// Detect opportunities for ldp/stp formation.
5250///
5251/// Only called for LdSt for which getMemOperandWithOffset returns true.
5253 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
5254 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
5255 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
5256 unsigned NumBytes) const {
5257 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
5258 const MachineOperand &BaseOp1 = *BaseOps1.front();
5259 const MachineOperand &BaseOp2 = *BaseOps2.front();
5260 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
5261 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
5262 if (BaseOp1.getType() != BaseOp2.getType())
5263 return false;
5264
5265 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
5266 "Only base registers and frame indices are supported.");
5267
5268 // Check for both base regs and base FI.
5269 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
5270 return false;
5271
5272 // Only cluster up to a single pair.
5273 if (ClusterSize > 2)
5274 return false;
5275
5276 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
5277 return false;
5278
5279 // Can we pair these instructions based on their opcodes?
5280 unsigned FirstOpc = FirstLdSt.getOpcode();
5281 unsigned SecondOpc = SecondLdSt.getOpcode();
5282 if (!canPairLdStOpc(FirstOpc, SecondOpc))
5283 return false;
5284
5285 // Can't merge volatiles or load/stores that have a hint to avoid pair
5286 // formation, for example.
5287 if (!isCandidateToMergeOrPair(FirstLdSt) ||
5288 !isCandidateToMergeOrPair(SecondLdSt))
5289 return false;
5290
5291 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
5292 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
5293 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
5294 return false;
5295
5296 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
5297 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
5298 return false;
5299
5300 // Pairwise instructions have a 7-bit signed offset field.
5301 if (Offset1 > 63 || Offset1 < -64)
5302 return false;
5303
5304 // The caller should already have ordered First/SecondLdSt by offset.
5305 // Note: except for non-equal frame index bases
5306 if (BaseOp1.isFI()) {
5307 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
5308 "Caller should have ordered offsets.");
5309
5310 const MachineFrameInfo &MFI =
5311 FirstLdSt.getParent()->getParent()->getFrameInfo();
5312 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
5313 BaseOp2.getIndex(), Offset2, SecondOpc);
5314 }
5315
5316 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
5317
5318 return Offset1 + 1 == Offset2;
5319}
5320
5322 MCRegister Reg, unsigned SubIdx,
5323 unsigned State,
5324 const TargetRegisterInfo *TRI) {
5325 if (!SubIdx)
5326 return MIB.addReg(Reg, State);
5327
5328 if (Reg.isPhysical())
5329 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
5330 return MIB.addReg(Reg, State, SubIdx);
5331}
5332
5333static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5334 unsigned NumRegs) {
5335 // We really want the positive remainder mod 32 here, that happens to be
5336 // easily obtainable with a mask.
5337 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5338}
5339
5342 const DebugLoc &DL, MCRegister DestReg,
5343 MCRegister SrcReg, bool KillSrc,
5344 unsigned Opcode,
5345 ArrayRef<unsigned> Indices) const {
5346 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5348 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5349 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5350 unsigned NumRegs = Indices.size();
5351
5352 int SubReg = 0, End = NumRegs, Incr = 1;
5353 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5354 SubReg = NumRegs - 1;
5355 End = -1;
5356 Incr = -1;
5357 }
5358
5359 for (; SubReg != End; SubReg += Incr) {
5360 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5361 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5362 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5363 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5364 }
5365}
5366
5369 const DebugLoc &DL, MCRegister DestReg,
5370 MCRegister SrcReg, bool KillSrc,
5371 unsigned Opcode, unsigned ZeroReg,
5372 llvm::ArrayRef<unsigned> Indices) const {
5374 unsigned NumRegs = Indices.size();
5375
5376#ifndef NDEBUG
5377 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5378 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5379 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5380 "GPR reg sequences should not be able to overlap");
5381#endif
5382
5383 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5384 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5385 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5386 MIB.addReg(ZeroReg);
5387 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5388 MIB.addImm(0);
5389 }
5390}
5391
5394 const DebugLoc &DL, Register DestReg,
5395 Register SrcReg, bool KillSrc,
5396 bool RenamableDest,
5397 bool RenamableSrc) const {
5398 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5399 AArch64::GPR32spRegClass.contains(SrcReg)) {
5400 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5401 // If either operand is WSP, expand to ADD #0.
5402 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5403 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5404 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5405 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5406 &AArch64::GPR64spRegClass);
5407 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5408 &AArch64::GPR64spRegClass);
5409 // This instruction is reading and writing X registers. This may upset
5410 // the register scavenger and machine verifier, so we need to indicate
5411 // that we are reading an undefined value from SrcRegX, but a proper
5412 // value from SrcReg.
5413 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5414 .addReg(SrcRegX, RegState::Undef)
5415 .addImm(0)
5417 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5418 } else {
5419 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5420 .addReg(SrcReg, getKillRegState(KillSrc))
5421 .addImm(0)
5423 }
5424 } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5425 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5426 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5427 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5428 &AArch64::GPR64spRegClass);
5429 assert(DestRegX.isValid() && "Destination super-reg not valid");
5430 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5431 &AArch64::GPR64spRegClass);
5432 assert(SrcRegX.isValid() && "Source super-reg not valid");
5433 // This instruction is reading and writing X registers. This may upset
5434 // the register scavenger and machine verifier, so we need to indicate
5435 // that we are reading an undefined value from SrcRegX, but a proper
5436 // value from SrcReg.
5437 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5438 .addReg(AArch64::XZR)
5439 .addReg(SrcRegX, RegState::Undef)
5440 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5441 } else {
5442 // Otherwise, expand to ORR WZR.
5443 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5444 .addReg(AArch64::WZR)
5445 .addReg(SrcReg, getKillRegState(KillSrc));
5446 }
5447 return;
5448 }
5449
5450 // GPR32 zeroing
5451 if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
5452 if (Subtarget.hasZeroCycleZeroingGPR64() &&
5453 !Subtarget.hasZeroCycleZeroingGPR32()) {
5454 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5455 &AArch64::GPR64spRegClass);
5456 assert(DestRegX.isValid() && "Destination super-reg not valid");
5457 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX)
5458 .addImm(0)
5460 } else if (Subtarget.hasZeroCycleZeroingGPR32()) {
5461 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5462 .addImm(0)
5464 } else {
5465 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5466 .addReg(AArch64::WZR)
5467 .addReg(AArch64::WZR);
5468 }
5469 return;
5470 }
5471
5472 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5473 AArch64::GPR64spRegClass.contains(SrcReg)) {
5474 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5475 // If either operand is SP, expand to ADD #0.
5476 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5477 .addReg(SrcReg, getKillRegState(KillSrc))
5478 .addImm(0)
5480 } else {
5481 // Otherwise, expand to ORR XZR.
5482 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5483 .addReg(AArch64::XZR)
5484 .addReg(SrcReg, getKillRegState(KillSrc));
5485 }
5486 return;
5487 }
5488
5489 // GPR64 zeroing
5490 if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
5491 if (Subtarget.hasZeroCycleZeroingGPR64()) {
5492 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5493 .addImm(0)
5495 } else {
5496 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5497 .addReg(AArch64::XZR)
5498 .addReg(AArch64::XZR);
5499 }
5500 return;
5501 }
5502
5503 // Copy a Predicate register by ORRing with itself.
5504 if (AArch64::PPRRegClass.contains(DestReg) &&
5505 AArch64::PPRRegClass.contains(SrcReg)) {
5506 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5507 "Unexpected SVE register.");
5508 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5509 .addReg(SrcReg) // Pg
5510 .addReg(SrcReg)
5511 .addReg(SrcReg, getKillRegState(KillSrc));
5512 return;
5513 }
5514
5515 // Copy a predicate-as-counter register by ORRing with itself as if it
5516 // were a regular predicate (mask) register.
5517 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5518 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5519 if (DestIsPNR || SrcIsPNR) {
5520 auto ToPPR = [](MCRegister R) -> MCRegister {
5521 return (R - AArch64::PN0) + AArch64::P0;
5522 };
5523 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5524 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5525
5526 if (PPRSrcReg != PPRDestReg) {
5527 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5528 .addReg(PPRSrcReg) // Pg
5529 .addReg(PPRSrcReg)
5530 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5531 if (DestIsPNR)
5532 NewMI.addDef(DestReg, RegState::Implicit);
5533 }
5534 return;
5535 }
5536
5537 // Copy a Z register by ORRing with itself.
5538 if (AArch64::ZPRRegClass.contains(DestReg) &&
5539 AArch64::ZPRRegClass.contains(SrcReg)) {
5540 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5541 "Unexpected SVE register.");
5542 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5543 .addReg(SrcReg)
5544 .addReg(SrcReg, getKillRegState(KillSrc));
5545 return;
5546 }
5547
5548 // Copy a Z register pair by copying the individual sub-registers.
5549 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5550 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5551 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5552 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5553 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5554 "Unexpected SVE register.");
5555 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5556 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5557 Indices);
5558 return;
5559 }
5560
5561 // Copy a Z register triple by copying the individual sub-registers.
5562 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5563 AArch64::ZPR3RegClass.contains(SrcReg)) {
5564 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5565 "Unexpected SVE register.");
5566 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5567 AArch64::zsub2};
5568 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5569 Indices);
5570 return;
5571 }
5572
5573 // Copy a Z register quad by copying the individual sub-registers.
5574 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5575 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5576 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5577 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5578 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5579 "Unexpected SVE register.");
5580 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5581 AArch64::zsub2, AArch64::zsub3};
5582 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5583 Indices);
5584 return;
5585 }
5586
5587 // Copy a DDDD register quad by copying the individual sub-registers.
5588 if (AArch64::DDDDRegClass.contains(DestReg) &&
5589 AArch64::DDDDRegClass.contains(SrcReg)) {
5590 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5591 AArch64::dsub2, AArch64::dsub3};
5592 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5593 Indices);
5594 return;
5595 }
5596
5597 // Copy a DDD register triple by copying the individual sub-registers.
5598 if (AArch64::DDDRegClass.contains(DestReg) &&
5599 AArch64::DDDRegClass.contains(SrcReg)) {
5600 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5601 AArch64::dsub2};
5602 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5603 Indices);
5604 return;
5605 }
5606
5607 // Copy a DD register pair by copying the individual sub-registers.
5608 if (AArch64::DDRegClass.contains(DestReg) &&
5609 AArch64::DDRegClass.contains(SrcReg)) {
5610 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5611 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5612 Indices);
5613 return;
5614 }
5615
5616 // Copy a QQQQ register quad by copying the individual sub-registers.
5617 if (AArch64::QQQQRegClass.contains(DestReg) &&
5618 AArch64::QQQQRegClass.contains(SrcReg)) {
5619 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5620 AArch64::qsub2, AArch64::qsub3};
5621 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5622 Indices);
5623 return;
5624 }
5625
5626 // Copy a QQQ register triple by copying the individual sub-registers.
5627 if (AArch64::QQQRegClass.contains(DestReg) &&
5628 AArch64::QQQRegClass.contains(SrcReg)) {
5629 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5630 AArch64::qsub2};
5631 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5632 Indices);
5633 return;
5634 }
5635
5636 // Copy a QQ register pair by copying the individual sub-registers.
5637 if (AArch64::QQRegClass.contains(DestReg) &&
5638 AArch64::QQRegClass.contains(SrcReg)) {
5639 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5640 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5641 Indices);
5642 return;
5643 }
5644
5645 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5646 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5647 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5648 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5649 AArch64::XZR, Indices);
5650 return;
5651 }
5652
5653 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5654 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5655 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5656 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5657 AArch64::WZR, Indices);
5658 return;
5659 }
5660
5661 if (AArch64::FPR128RegClass.contains(DestReg) &&
5662 AArch64::FPR128RegClass.contains(SrcReg)) {
5663 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5664 !Subtarget.isNeonAvailable())
5665 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5666 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5667 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5668 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5669 else if (Subtarget.isNeonAvailable())
5670 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5671 .addReg(SrcReg)
5672 .addReg(SrcReg, getKillRegState(KillSrc));
5673 else {
5674 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5675 .addReg(AArch64::SP, RegState::Define)
5676 .addReg(SrcReg, getKillRegState(KillSrc))
5677 .addReg(AArch64::SP)
5678 .addImm(-16);
5679 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5680 .addReg(AArch64::SP, RegState::Define)
5681 .addReg(DestReg, RegState::Define)
5682 .addReg(AArch64::SP)
5683 .addImm(16);
5684 }
5685 return;
5686 }
5687
5688 if (AArch64::FPR64RegClass.contains(DestReg) &&
5689 AArch64::FPR64RegClass.contains(SrcReg)) {
5690 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5691 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5692 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5693 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
5694 &AArch64::FPR128RegClass);
5695 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
5696 &AArch64::FPR128RegClass);
5697 // This instruction is reading and writing Q registers. This may upset
5698 // the register scavenger and machine verifier, so we need to indicate
5699 // that we are reading an undefined value from SrcRegQ, but a proper
5700 // value from SrcReg.
5701 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5702 .addReg(SrcRegQ, RegState::Undef)
5703 .addReg(SrcRegQ, RegState::Undef)
5704 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5705 } else {
5706 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5707 .addReg(SrcReg, getKillRegState(KillSrc));
5708 }
5709 return;
5710 }
5711
5712 if (AArch64::FPR32RegClass.contains(DestReg) &&
5713 AArch64::FPR32RegClass.contains(SrcReg)) {
5714 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5715 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5716 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5717 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5718 &AArch64::FPR128RegClass);
5719 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5720 &AArch64::FPR128RegClass);
5721 // This instruction is reading and writing Q registers. This may upset
5722 // the register scavenger and machine verifier, so we need to indicate
5723 // that we are reading an undefined value from SrcRegQ, but a proper
5724 // value from SrcReg.
5725 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5726 .addReg(SrcRegQ, RegState::Undef)
5727 .addReg(SrcRegQ, RegState::Undef)
5728 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5729 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5730 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5731 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5732 &AArch64::FPR64RegClass);
5733 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5734 &AArch64::FPR64RegClass);
5735 // This instruction is reading and writing D registers. This may upset
5736 // the register scavenger and machine verifier, so we need to indicate
5737 // that we are reading an undefined value from SrcRegD, but a proper
5738 // value from SrcReg.
5739 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5740 .addReg(SrcRegD, RegState::Undef)
5741 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5742 } else {
5743 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5744 .addReg(SrcReg, getKillRegState(KillSrc));
5745 }
5746 return;
5747 }
5748
5749 if (AArch64::FPR16RegClass.contains(DestReg) &&
5750 AArch64::FPR16RegClass.contains(SrcReg)) {
5751 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5752 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5753 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5754 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5755 &AArch64::FPR128RegClass);
5756 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5757 &AArch64::FPR128RegClass);
5758 // This instruction is reading and writing Q registers. This may upset
5759 // the register scavenger and machine verifier, so we need to indicate
5760 // that we are reading an undefined value from SrcRegQ, but a proper
5761 // value from SrcReg.
5762 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5763 .addReg(SrcRegQ, RegState::Undef)
5764 .addReg(SrcRegQ, RegState::Undef)
5765 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5766 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5767 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5768 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5769 &AArch64::FPR64RegClass);
5770 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5771 &AArch64::FPR64RegClass);
5772 // This instruction is reading and writing D registers. This may upset
5773 // the register scavenger and machine verifier, so we need to indicate
5774 // that we are reading an undefined value from SrcRegD, but a proper
5775 // value from SrcReg.
5776 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5777 .addReg(SrcRegD, RegState::Undef)
5778 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5779 } else {
5780 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5781 &AArch64::FPR32RegClass);
5782 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5783 &AArch64::FPR32RegClass);
5784 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5785 .addReg(SrcReg, getKillRegState(KillSrc));
5786 }
5787 return;
5788 }
5789
5790 if (AArch64::FPR8RegClass.contains(DestReg) &&
5791 AArch64::FPR8RegClass.contains(SrcReg)) {
5792 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5793 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5794 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5795 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5796 &AArch64::FPR128RegClass);
5797 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5798 &AArch64::FPR128RegClass);
5799 // This instruction is reading and writing Q registers. This may upset
5800 // the register scavenger and machine verifier, so we need to indicate
5801 // that we are reading an undefined value from SrcRegQ, but a proper
5802 // value from SrcReg.
5803 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5804 .addReg(SrcRegQ, RegState::Undef)
5805 .addReg(SrcRegQ, RegState::Undef)
5806 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5807 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5808 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5809 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5810 &AArch64::FPR64RegClass);
5811 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5812 &AArch64::FPR64RegClass);
5813 // This instruction is reading and writing D registers. This may upset
5814 // the register scavenger and machine verifier, so we need to indicate
5815 // that we are reading an undefined value from SrcRegD, but a proper
5816 // value from SrcReg.
5817 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5818 .addReg(SrcRegD, RegState::Undef)
5819 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5820 } else {
5821 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5822 &AArch64::FPR32RegClass);
5823 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5824 &AArch64::FPR32RegClass);
5825 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5826 .addReg(SrcReg, getKillRegState(KillSrc));
5827 }
5828 return;
5829 }
5830
5831 // Copies between GPR64 and FPR64.
5832 if (AArch64::FPR64RegClass.contains(DestReg) &&
5833 AArch64::GPR64RegClass.contains(SrcReg)) {
5834 if (AArch64::XZR == SrcReg) {
5835 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5836 } else {
5837 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5838 .addReg(SrcReg, getKillRegState(KillSrc));
5839 }
5840 return;
5841 }
5842 if (AArch64::GPR64RegClass.contains(DestReg) &&
5843 AArch64::FPR64RegClass.contains(SrcReg)) {
5844 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5845 .addReg(SrcReg, getKillRegState(KillSrc));
5846 return;
5847 }
5848 // Copies between GPR32 and FPR32.
5849 if (AArch64::FPR32RegClass.contains(DestReg) &&
5850 AArch64::GPR32RegClass.contains(SrcReg)) {
5851 if (AArch64::WZR == SrcReg) {
5852 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5853 } else {
5854 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5855 .addReg(SrcReg, getKillRegState(KillSrc));
5856 }
5857 return;
5858 }
5859 if (AArch64::GPR32RegClass.contains(DestReg) &&
5860 AArch64::FPR32RegClass.contains(SrcReg)) {
5861 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5862 .addReg(SrcReg, getKillRegState(KillSrc));
5863 return;
5864 }
5865
5866 if (DestReg == AArch64::NZCV) {
5867 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5868 BuildMI(MBB, I, DL, get(AArch64::MSR))
5869 .addImm(AArch64SysReg::NZCV)
5870 .addReg(SrcReg, getKillRegState(KillSrc))
5871 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5872 return;
5873 }
5874
5875 if (SrcReg == AArch64::NZCV) {
5876 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5877 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5878 .addImm(AArch64SysReg::NZCV)
5879 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5880 return;
5881 }
5882
5883#ifndef NDEBUG
5884 errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)
5885 << "\n";
5886#endif
5887 llvm_unreachable("unimplemented reg-to-reg copy");
5888}
5889
5892 MachineBasicBlock::iterator InsertBefore,
5893 const MCInstrDesc &MCID,
5894 Register SrcReg, bool IsKill,
5895 unsigned SubIdx0, unsigned SubIdx1, int FI,
5896 MachineMemOperand *MMO) {
5897 Register SrcReg0 = SrcReg;
5898 Register SrcReg1 = SrcReg;
5899 if (SrcReg.isPhysical()) {
5900 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5901 SubIdx0 = 0;
5902 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5903 SubIdx1 = 0;
5904 }
5905 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5906 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5907 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5908 .addFrameIndex(FI)
5909 .addImm(0)
5910 .addMemOperand(MMO);
5911}
5912
5915 Register SrcReg, bool isKill, int FI,
5916 const TargetRegisterClass *RC,
5917 Register VReg,
5918 MachineInstr::MIFlag Flags) const {
5919 MachineFunction &MF = *MBB.getParent();
5920 MachineFrameInfo &MFI = MF.getFrameInfo();
5921
5923 MachineMemOperand *MMO =
5925 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5926 unsigned Opc = 0;
5927 bool Offset = true;
5929 unsigned StackID = TargetStackID::Default;
5930 switch (RI.getSpillSize(*RC)) {
5931 case 1:
5932 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5933 Opc = AArch64::STRBui;
5934 break;
5935 case 2: {
5936 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5937 Opc = AArch64::STRHui;
5938 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5939 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5940 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5941 "Unexpected register store without SVE store instructions");
5942 Opc = AArch64::STR_PXI;
5944 }
5945 break;
5946 }
5947 case 4:
5948 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5949 Opc = AArch64::STRWui;
5950 if (SrcReg.isVirtual())
5951 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5952 else
5953 assert(SrcReg != AArch64::WSP);
5954 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5955 Opc = AArch64::STRSui;
5956 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5957 Opc = AArch64::STR_PPXI;
5959 }
5960 break;
5961 case 8:
5962 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5963 Opc = AArch64::STRXui;
5964 if (SrcReg.isVirtual())
5965 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5966 else
5967 assert(SrcReg != AArch64::SP);
5968 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5969 Opc = AArch64::STRDui;
5970 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5972 get(AArch64::STPWi), SrcReg, isKill,
5973 AArch64::sube32, AArch64::subo32, FI, MMO);
5974 return;
5975 }
5976 break;
5977 case 16:
5978 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5979 Opc = AArch64::STRQui;
5980 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5981 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5982 Opc = AArch64::ST1Twov1d;
5983 Offset = false;
5984 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5986 get(AArch64::STPXi), SrcReg, isKill,
5987 AArch64::sube64, AArch64::subo64, FI, MMO);
5988 return;
5989 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5990 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5991 "Unexpected register store without SVE store instructions");
5992 Opc = AArch64::STR_ZXI;
5994 }
5995 break;
5996 case 24:
5997 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5998 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5999 Opc = AArch64::ST1Threev1d;
6000 Offset = false;
6001 }
6002 break;
6003 case 32:
6004 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6005 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6006 Opc = AArch64::ST1Fourv1d;
6007 Offset = false;
6008 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6009 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6010 Opc = AArch64::ST1Twov2d;
6011 Offset = false;
6012 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6013 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6014 "Unexpected register store without SVE store instructions");
6015 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
6017 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6018 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6019 "Unexpected register store without SVE store instructions");
6020 Opc = AArch64::STR_ZZXI;
6022 }
6023 break;
6024 case 48:
6025 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6026 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6027 Opc = AArch64::ST1Threev2d;
6028 Offset = false;
6029 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6030 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6031 "Unexpected register store without SVE store instructions");
6032 Opc = AArch64::STR_ZZZXI;
6034 }
6035 break;
6036 case 64:
6037 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6038 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6039 Opc = AArch64::ST1Fourv2d;
6040 Offset = false;
6041 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6042 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6043 "Unexpected register store without SVE store instructions");
6044 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
6046 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6047 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6048 "Unexpected register store without SVE store instructions");
6049 Opc = AArch64::STR_ZZZZXI;
6051 }
6052 break;
6053 }
6054 assert(Opc && "Unknown register class");
6055 MFI.setStackID(FI, StackID);
6056
6058 .addReg(SrcReg, getKillRegState(isKill))
6059 .addFrameIndex(FI);
6060
6061 if (Offset)
6062 MI.addImm(0);
6063 if (PNRReg.isValid())
6064 MI.addDef(PNRReg, RegState::Implicit);
6065 MI.addMemOperand(MMO);
6066}
6067
6070 MachineBasicBlock::iterator InsertBefore,
6071 const MCInstrDesc &MCID,
6072 Register DestReg, unsigned SubIdx0,
6073 unsigned SubIdx1, int FI,
6074 MachineMemOperand *MMO) {
6075 Register DestReg0 = DestReg;
6076 Register DestReg1 = DestReg;
6077 bool IsUndef = true;
6078 if (DestReg.isPhysical()) {
6079 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
6080 SubIdx0 = 0;
6081 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
6082 SubIdx1 = 0;
6083 IsUndef = false;
6084 }
6085 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
6086 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
6087 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
6088 .addFrameIndex(FI)
6089 .addImm(0)
6090 .addMemOperand(MMO);
6091}
6092
6095 Register DestReg, int FI,
6096 const TargetRegisterClass *RC,
6097 Register VReg,
6098 MachineInstr::MIFlag Flags) const {
6099 MachineFunction &MF = *MBB.getParent();
6100 MachineFrameInfo &MFI = MF.getFrameInfo();
6102 MachineMemOperand *MMO =
6104 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
6105
6106 unsigned Opc = 0;
6107 bool Offset = true;
6108 unsigned StackID = TargetStackID::Default;
6110 switch (TRI.getSpillSize(*RC)) {
6111 case 1:
6112 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
6113 Opc = AArch64::LDRBui;
6114 break;
6115 case 2: {
6116 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
6117 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
6118 Opc = AArch64::LDRHui;
6119 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
6120 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6121 "Unexpected register load without SVE load instructions");
6122 if (IsPNR)
6123 PNRReg = DestReg;
6124 Opc = AArch64::LDR_PXI;
6126 }
6127 break;
6128 }
6129 case 4:
6130 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
6131 Opc = AArch64::LDRWui;
6132 if (DestReg.isVirtual())
6133 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
6134 else
6135 assert(DestReg != AArch64::WSP);
6136 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6137 Opc = AArch64::LDRSui;
6138 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
6139 Opc = AArch64::LDR_PPXI;
6141 }
6142 break;
6143 case 8:
6144 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
6145 Opc = AArch64::LDRXui;
6146 if (DestReg.isVirtual())
6147 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
6148 else
6149 assert(DestReg != AArch64::SP);
6150 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
6151 Opc = AArch64::LDRDui;
6152 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
6154 get(AArch64::LDPWi), DestReg, AArch64::sube32,
6155 AArch64::subo32, FI, MMO);
6156 return;
6157 }
6158 break;
6159 case 16:
6160 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
6161 Opc = AArch64::LDRQui;
6162 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
6163 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6164 Opc = AArch64::LD1Twov1d;
6165 Offset = false;
6166 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
6168 get(AArch64::LDPXi), DestReg, AArch64::sube64,
6169 AArch64::subo64, FI, MMO);
6170 return;
6171 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
6172 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6173 "Unexpected register load without SVE load instructions");
6174 Opc = AArch64::LDR_ZXI;
6176 }
6177 break;
6178 case 24:
6179 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
6180 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6181 Opc = AArch64::LD1Threev1d;
6182 Offset = false;
6183 }
6184 break;
6185 case 32:
6186 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6187 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6188 Opc = AArch64::LD1Fourv1d;
6189 Offset = false;
6190 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6191 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6192 Opc = AArch64::LD1Twov2d;
6193 Offset = false;
6194 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6195 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6196 "Unexpected register load without SVE load instructions");
6197 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
6199 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6200 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6201 "Unexpected register load without SVE load instructions");
6202 Opc = AArch64::LDR_ZZXI;
6204 }
6205 break;
6206 case 48:
6207 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6208 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6209 Opc = AArch64::LD1Threev2d;
6210 Offset = false;
6211 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6212 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6213 "Unexpected register load without SVE load instructions");
6214 Opc = AArch64::LDR_ZZZXI;
6216 }
6217 break;
6218 case 64:
6219 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6220 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6221 Opc = AArch64::LD1Fourv2d;
6222 Offset = false;
6223 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6224 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6225 "Unexpected register load without SVE load instructions");
6226 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
6228 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6229 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6230 "Unexpected register load without SVE load instructions");
6231 Opc = AArch64::LDR_ZZZZXI;
6233 }
6234 break;
6235 }
6236
6237 assert(Opc && "Unknown register class");
6238 MFI.setStackID(FI, StackID);
6239
6241 .addReg(DestReg, getDefRegState(true))
6242 .addFrameIndex(FI);
6243 if (Offset)
6244 MI.addImm(0);
6245 if (PNRReg.isValid() && !PNRReg.isVirtual())
6246 MI.addDef(PNRReg, RegState::Implicit);
6247 MI.addMemOperand(MMO);
6248}
6249
6251 const MachineInstr &UseMI,
6252 const TargetRegisterInfo *TRI) {
6253 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
6254 UseMI.getIterator()),
6255 [TRI](const MachineInstr &I) {
6256 return I.modifiesRegister(AArch64::NZCV, TRI) ||
6257 I.readsRegister(AArch64::NZCV, TRI);
6258 });
6259}
6260
6261void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6262 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
6263 // The smallest scalable element supported by scaled SVE addressing
6264 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6265 // byte offset must always be a multiple of 2.
6266 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6267
6268 // VGSized offsets are divided by '2', because the VG register is the
6269 // the number of 64bit granules as opposed to 128bit vector chunks,
6270 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
6271 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
6272 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
6273 ByteSized = Offset.getFixed();
6274 VGSized = Offset.getScalable() / 2;
6275}
6276
6277/// Returns the offset in parts to which this frame offset can be
6278/// decomposed for the purpose of describing a frame offset.
6279/// For non-scalable offsets this is simply its byte size.
6280void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6281 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
6282 int64_t &NumDataVectors) {
6283 // The smallest scalable element supported by scaled SVE addressing
6284 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6285 // byte offset must always be a multiple of 2.
6286 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6287
6288 NumBytes = Offset.getFixed();
6289 NumDataVectors = 0;
6290 NumPredicateVectors = Offset.getScalable() / 2;
6291 // This method is used to get the offsets to adjust the frame offset.
6292 // If the function requires ADDPL to be used and needs more than two ADDPL
6293 // instructions, part of the offset is folded into NumDataVectors so that it
6294 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
6295 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
6296 NumPredicateVectors > 62) {
6297 NumDataVectors = NumPredicateVectors / 8;
6298 NumPredicateVectors -= NumDataVectors * 8;
6299 }
6300}
6301
6302// Convenience function to create a DWARF expression for: Constant `Operation`.
6303// This helper emits compact sequences for common cases. For example, for`-15
6304// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
6307 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
6308 // -Constant (1 to 31)
6309 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
6310 Operation = dwarf::DW_OP_minus;
6311 } else if (Constant >= 0 && Constant <= 31) {
6312 // Literal value 0 to 31
6313 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
6314 } else {
6315 // Signed constant
6316 Expr.push_back(dwarf::DW_OP_consts);
6318 }
6319 return Expr.push_back(Operation);
6320}
6321
6322// Convenience function to create a DWARF expression for a register.
6323static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
6324 Expr.push_back((char)dwarf::DW_OP_bregx);
6326 Expr.push_back(0);
6327}
6328
6329// Convenience function to create a DWARF expression for loading a register from
6330// a CFA offset.
6332 int64_t OffsetFromDefCFA) {
6333 // This assumes the top of the DWARF stack contains the CFA.
6334 Expr.push_back(dwarf::DW_OP_dup);
6335 // Add the offset to the register.
6336 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
6337 // Dereference the address (loads a 64 bit value)..
6338 Expr.push_back(dwarf::DW_OP_deref);
6339}
6340
6341// Convenience function to create a comment for
6342// (+/-) NumBytes (* RegScale)?
6343static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6344 StringRef RegScale = {}) {
6345 if (NumBytes) {
6346 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6347 if (!RegScale.empty())
6348 Comment << ' ' << RegScale;
6349 }
6350}
6351
6352// Creates an MCCFIInstruction:
6353// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6355 unsigned Reg,
6356 const StackOffset &Offset) {
6357 int64_t NumBytes, NumVGScaledBytes;
6358 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6359 NumVGScaledBytes);
6360 std::string CommentBuffer;
6361 llvm::raw_string_ostream Comment(CommentBuffer);
6362
6363 if (Reg == AArch64::SP)
6364 Comment << "sp";
6365 else if (Reg == AArch64::FP)
6366 Comment << "fp";
6367 else
6368 Comment << printReg(Reg, &TRI);
6369
6370 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6371 SmallString<64> Expr;
6372 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6373 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6374 // Reg + NumBytes
6375 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6376 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6377 appendOffsetComment(NumBytes, Comment);
6378 if (NumVGScaledBytes) {
6379 // + VG * NumVGScaledBytes
6380 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6381 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6382 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6383 Expr.push_back(dwarf::DW_OP_plus);
6384 }
6385
6386 // Wrap this into DW_CFA_def_cfa.
6387 SmallString<64> DefCfaExpr;
6388 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6389 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6390 DefCfaExpr.append(Expr.str());
6391 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6392 Comment.str());
6393}
6394
6396 unsigned FrameReg, unsigned Reg,
6397 const StackOffset &Offset,
6398 bool LastAdjustmentWasScalable) {
6399 if (Offset.getScalable())
6400 return createDefCFAExpression(TRI, Reg, Offset);
6401
6402 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6403 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6404
6405 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6406 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6407}
6408
6411 const StackOffset &OffsetFromDefCFA,
6412 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6413 int64_t NumBytes, NumVGScaledBytes;
6414 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6415 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6416
6417 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6418
6419 // Non-scalable offsets can use DW_CFA_offset directly.
6420 if (!NumVGScaledBytes)
6421 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6422
6423 std::string CommentBuffer;
6424 llvm::raw_string_ostream Comment(CommentBuffer);
6425 Comment << printReg(Reg, &TRI) << " @ cfa";
6426
6427 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6428 assert(NumVGScaledBytes && "Expected scalable offset");
6429 SmallString<64> OffsetExpr;
6430 // + VG * NumVGScaledBytes
6431 StringRef VGRegScale;
6432 if (IncomingVGOffsetFromDefCFA) {
6433 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6434 VGRegScale = "* IncomingVG";
6435 } else {
6436 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6437 VGRegScale = "* VG";
6438 }
6439 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6440 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6441 OffsetExpr.push_back(dwarf::DW_OP_plus);
6442 if (NumBytes) {
6443 // + NumBytes
6444 appendOffsetComment(NumBytes, Comment);
6445 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6446 }
6447
6448 // Wrap this into DW_CFA_expression
6449 SmallString<64> CfaExpr;
6450 CfaExpr.push_back(dwarf::DW_CFA_expression);
6451 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6452 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6453 CfaExpr.append(OffsetExpr.str());
6454
6455 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6456 Comment.str());
6457}
6458
6459// Helper function to emit a frame offset adjustment from a given
6460// pointer (SrcReg), stored into DestReg. This function is explicit
6461// in that it requires the opcode.
6464 const DebugLoc &DL, unsigned DestReg,
6465 unsigned SrcReg, int64_t Offset, unsigned Opc,
6466 const TargetInstrInfo *TII,
6467 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6468 bool *HasWinCFI, bool EmitCFAOffset,
6469 StackOffset CFAOffset, unsigned FrameReg) {
6470 int Sign = 1;
6471 unsigned MaxEncoding, ShiftSize;
6472 switch (Opc) {
6473 case AArch64::ADDXri:
6474 case AArch64::ADDSXri:
6475 case AArch64::SUBXri:
6476 case AArch64::SUBSXri:
6477 MaxEncoding = 0xfff;
6478 ShiftSize = 12;
6479 break;
6480 case AArch64::ADDVL_XXI:
6481 case AArch64::ADDPL_XXI:
6482 case AArch64::ADDSVL_XXI:
6483 case AArch64::ADDSPL_XXI:
6484 MaxEncoding = 31;
6485 ShiftSize = 0;
6486 if (Offset < 0) {
6487 MaxEncoding = 32;
6488 Sign = -1;
6489 Offset = -Offset;
6490 }
6491 break;
6492 default:
6493 llvm_unreachable("Unsupported opcode");
6494 }
6495
6496 // `Offset` can be in bytes or in "scalable bytes".
6497 int VScale = 1;
6498 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6499 VScale = 16;
6500 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6501 VScale = 2;
6502
6503 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6504 // scratch register. If DestReg is a virtual register, use it as the
6505 // scratch register; otherwise, create a new virtual register (to be
6506 // replaced by the scavenger at the end of PEI). That case can be optimized
6507 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6508 // register can be loaded with offset%8 and the add/sub can use an extending
6509 // instruction with LSL#3.
6510 // Currently the function handles any offsets but generates a poor sequence
6511 // of code.
6512 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6513
6514 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6515 Register TmpReg = DestReg;
6516 if (TmpReg == AArch64::XZR)
6517 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6518 &AArch64::GPR64RegClass);
6519 do {
6520 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6521 unsigned LocalShiftSize = 0;
6522 if (ThisVal > MaxEncoding) {
6523 ThisVal = ThisVal >> ShiftSize;
6524 LocalShiftSize = ShiftSize;
6525 }
6526 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6527 "Encoding cannot handle value that big");
6528
6529 Offset -= ThisVal << LocalShiftSize;
6530 if (Offset == 0)
6531 TmpReg = DestReg;
6532 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6533 .addReg(SrcReg)
6534 .addImm(Sign * (int)ThisVal);
6535 if (ShiftSize)
6536 MBI = MBI.addImm(
6538 MBI = MBI.setMIFlag(Flag);
6539
6540 auto Change =
6541 VScale == 1
6542 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6543 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6544 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6545 CFAOffset += Change;
6546 else
6547 CFAOffset -= Change;
6548 if (EmitCFAOffset && DestReg == TmpReg) {
6549 MachineFunction &MF = *MBB.getParent();
6550 const TargetSubtargetInfo &STI = MF.getSubtarget();
6551 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6552
6553 unsigned CFIIndex = MF.addFrameInst(
6554 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6555 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6556 .addCFIIndex(CFIIndex)
6557 .setMIFlags(Flag);
6558 }
6559
6560 if (NeedsWinCFI) {
6561 int Imm = (int)(ThisVal << LocalShiftSize);
6562 if (VScale != 1 && DestReg == AArch64::SP) {
6563 if (HasWinCFI)
6564 *HasWinCFI = true;
6565 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6566 .addImm(ThisVal)
6567 .setMIFlag(Flag);
6568 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6569 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6570 assert(VScale == 1 && "Expected non-scalable operation");
6571 if (HasWinCFI)
6572 *HasWinCFI = true;
6573 if (Imm == 0)
6574 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6575 else
6576 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6577 .addImm(Imm)
6578 .setMIFlag(Flag);
6579 assert(Offset == 0 && "Expected remaining offset to be zero to "
6580 "emit a single SEH directive");
6581 } else if (DestReg == AArch64::SP) {
6582 assert(VScale == 1 && "Expected non-scalable operation");
6583 if (HasWinCFI)
6584 *HasWinCFI = true;
6585 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6586 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6587 .addImm(Imm)
6588 .setMIFlag(Flag);
6589 }
6590 }
6591
6592 SrcReg = TmpReg;
6593 } while (Offset);
6594}
6595
6598 unsigned DestReg, unsigned SrcReg,
6600 MachineInstr::MIFlag Flag, bool SetNZCV,
6601 bool NeedsWinCFI, bool *HasWinCFI,
6602 bool EmitCFAOffset, StackOffset CFAOffset,
6603 unsigned FrameReg) {
6604 // If a function is marked as arm_locally_streaming, then the runtime value of
6605 // vscale in the prologue/epilogue is different the runtime value of vscale
6606 // in the function's body. To avoid having to consider multiple vscales,
6607 // we can use `addsvl` to allocate any scalable stack-slots, which under
6608 // most circumstances will be only locals, not callee-save slots.
6609 const Function &F = MBB.getParent()->getFunction();
6610 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6611
6612 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6613 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6614 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6615
6616 // Insert ADDSXri for scalable offset at the end.
6617 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6618 if (NeedsFinalDefNZCV)
6619 SetNZCV = false;
6620
6621 // First emit non-scalable frame offsets, or a simple 'mov'.
6622 if (Bytes || (!Offset && SrcReg != DestReg)) {
6623 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6624 "SP increment/decrement not 8-byte aligned");
6625 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6626 if (Bytes < 0) {
6627 Bytes = -Bytes;
6628 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6629 }
6630 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6631 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6632 FrameReg);
6633 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6634 ? StackOffset::getFixed(-Bytes)
6635 : StackOffset::getFixed(Bytes);
6636 SrcReg = DestReg;
6637 FrameReg = DestReg;
6638 }
6639
6640 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6641 "WinCFI can't allocate fractions of an SVE data vector");
6642
6643 if (NumDataVectors) {
6644 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6645 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6646 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6647 FrameReg);
6648 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6649 SrcReg = DestReg;
6650 }
6651
6652 if (NumPredicateVectors) {
6653 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6654 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6655 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6656 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6657 FrameReg);
6658 }
6659
6660 if (NeedsFinalDefNZCV)
6661 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6662 .addReg(DestReg)
6663 .addImm(0)
6664 .addImm(0);
6665}
6666
6669 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6670 LiveIntervals *LIS, VirtRegMap *VRM) const {
6671 // This is a bit of a hack. Consider this instruction:
6672 //
6673 // %0 = COPY %sp; GPR64all:%0
6674 //
6675 // We explicitly chose GPR64all for the virtual register so such a copy might
6676 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6677 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6678 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6679 //
6680 // To prevent that, we are going to constrain the %0 register class here.
6681 if (MI.isFullCopy()) {
6682 Register DstReg = MI.getOperand(0).getReg();
6683 Register SrcReg = MI.getOperand(1).getReg();
6684 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6685 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6686 return nullptr;
6687 }
6688 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6689 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6690 return nullptr;
6691 }
6692 // Nothing can folded with copy from/to NZCV.
6693 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6694 return nullptr;
6695 }
6696
6697 // Handle the case where a copy is being spilled or filled but the source
6698 // and destination register class don't match. For example:
6699 //
6700 // %0 = COPY %xzr; GPR64common:%0
6701 //
6702 // In this case we can still safely fold away the COPY and generate the
6703 // following spill code:
6704 //
6705 // STRXui %xzr, %stack.0
6706 //
6707 // This also eliminates spilled cross register class COPYs (e.g. between x and
6708 // d regs) of the same size. For example:
6709 //
6710 // %0 = COPY %1; GPR64:%0, FPR64:%1
6711 //
6712 // will be filled as
6713 //
6714 // LDRDui %0, fi<#0>
6715 //
6716 // instead of
6717 //
6718 // LDRXui %Temp, fi<#0>
6719 // %0 = FMOV %Temp
6720 //
6721 if (MI.isCopy() && Ops.size() == 1 &&
6722 // Make sure we're only folding the explicit COPY defs/uses.
6723 (Ops[0] == 0 || Ops[0] == 1)) {
6724 bool IsSpill = Ops[0] == 0;
6725 bool IsFill = !IsSpill;
6727 const MachineRegisterInfo &MRI = MF.getRegInfo();
6728 MachineBasicBlock &MBB = *MI.getParent();
6729 const MachineOperand &DstMO = MI.getOperand(0);
6730 const MachineOperand &SrcMO = MI.getOperand(1);
6731 Register DstReg = DstMO.getReg();
6732 Register SrcReg = SrcMO.getReg();
6733 // This is slightly expensive to compute for physical regs since
6734 // getMinimalPhysRegClass is slow.
6735 auto getRegClass = [&](unsigned Reg) {
6736 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6737 : TRI.getMinimalPhysRegClass(Reg);
6738 };
6739
6740 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6741 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6742 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6743 "Mismatched register size in non subreg COPY");
6744 if (IsSpill)
6745 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6746 getRegClass(SrcReg), Register());
6747 else
6748 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6749 getRegClass(DstReg), Register());
6750 return &*--InsertPt;
6751 }
6752
6753 // Handle cases like spilling def of:
6754 //
6755 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6756 //
6757 // where the physical register source can be widened and stored to the full
6758 // virtual reg destination stack slot, in this case producing:
6759 //
6760 // STRXui %xzr, %stack.0
6761 //
6762 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6763 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6764 assert(SrcMO.getSubReg() == 0 &&
6765 "Unexpected subreg on physical register");
6766 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6767 FrameIndex, &AArch64::GPR64RegClass, Register());
6768 return &*--InsertPt;
6769 }
6770
6771 // Handle cases like filling use of:
6772 //
6773 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6774 //
6775 // where we can load the full virtual reg source stack slot, into the subreg
6776 // destination, in this case producing:
6777 //
6778 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6779 //
6780 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6781 const TargetRegisterClass *FillRC = nullptr;
6782 switch (DstMO.getSubReg()) {
6783 default:
6784 break;
6785 case AArch64::sub_32:
6786 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6787 FillRC = &AArch64::GPR32RegClass;
6788 break;
6789 case AArch64::ssub:
6790 FillRC = &AArch64::FPR32RegClass;
6791 break;
6792 case AArch64::dsub:
6793 FillRC = &AArch64::FPR64RegClass;
6794 break;
6795 }
6796
6797 if (FillRC) {
6798 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6799 TRI.getRegSizeInBits(*FillRC) &&
6800 "Mismatched regclass size on folded subreg COPY");
6801 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC,
6802 Register());
6803 MachineInstr &LoadMI = *--InsertPt;
6804 MachineOperand &LoadDst = LoadMI.getOperand(0);
6805 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6806 LoadDst.setSubReg(DstMO.getSubReg());
6807 LoadDst.setIsUndef();
6808 return &LoadMI;
6809 }
6810 }
6811 }
6812
6813 // Cannot fold.
6814 return nullptr;
6815}
6816
6818 StackOffset &SOffset,
6819 bool *OutUseUnscaledOp,
6820 unsigned *OutUnscaledOp,
6821 int64_t *EmittableOffset) {
6822 // Set output values in case of early exit.
6823 if (EmittableOffset)
6824 *EmittableOffset = 0;
6825 if (OutUseUnscaledOp)
6826 *OutUseUnscaledOp = false;
6827 if (OutUnscaledOp)
6828 *OutUnscaledOp = 0;
6829
6830 // Exit early for structured vector spills/fills as they can't take an
6831 // immediate offset.
6832 switch (MI.getOpcode()) {
6833 default:
6834 break;
6835 case AArch64::LD1Rv1d:
6836 case AArch64::LD1Rv2s:
6837 case AArch64::LD1Rv2d:
6838 case AArch64::LD1Rv4h:
6839 case AArch64::LD1Rv4s:
6840 case AArch64::LD1Rv8b:
6841 case AArch64::LD1Rv8h:
6842 case AArch64::LD1Rv16b:
6843 case AArch64::LD1Twov2d:
6844 case AArch64::LD1Threev2d:
6845 case AArch64::LD1Fourv2d:
6846 case AArch64::LD1Twov1d:
6847 case AArch64::LD1Threev1d:
6848 case AArch64::LD1Fourv1d:
6849 case AArch64::ST1Twov2d:
6850 case AArch64::ST1Threev2d:
6851 case AArch64::ST1Fourv2d:
6852 case AArch64::ST1Twov1d:
6853 case AArch64::ST1Threev1d:
6854 case AArch64::ST1Fourv1d:
6855 case AArch64::ST1i8:
6856 case AArch64::ST1i16:
6857 case AArch64::ST1i32:
6858 case AArch64::ST1i64:
6859 case AArch64::IRG:
6860 case AArch64::IRGstack:
6861 case AArch64::STGloop:
6862 case AArch64::STZGloop:
6864 }
6865
6866 // Get the min/max offset and the scale.
6867 TypeSize ScaleValue(0U, false), Width(0U, false);
6868 int64_t MinOff, MaxOff;
6869 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6870 MaxOff))
6871 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6872
6873 // Construct the complete offset.
6874 bool IsMulVL = ScaleValue.isScalable();
6875 unsigned Scale = ScaleValue.getKnownMinValue();
6876 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6877
6878 const MachineOperand &ImmOpnd =
6879 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6880 Offset += ImmOpnd.getImm() * Scale;
6881
6882 // If the offset doesn't match the scale, we rewrite the instruction to
6883 // use the unscaled instruction instead. Likewise, if we have a negative
6884 // offset and there is an unscaled op to use.
6885 std::optional<unsigned> UnscaledOp =
6887 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6888 if (useUnscaledOp &&
6889 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6890 MaxOff))
6891 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6892
6893 Scale = ScaleValue.getKnownMinValue();
6894 assert(IsMulVL == ScaleValue.isScalable() &&
6895 "Unscaled opcode has different value for scalable");
6896
6897 int64_t Remainder = Offset % Scale;
6898 assert(!(Remainder && useUnscaledOp) &&
6899 "Cannot have remainder when using unscaled op");
6900
6901 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6902 int64_t NewOffset = Offset / Scale;
6903 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6904 Offset = Remainder;
6905 else {
6906 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6907 Offset = Offset - (NewOffset * Scale);
6908 }
6909
6910 if (EmittableOffset)
6911 *EmittableOffset = NewOffset;
6912 if (OutUseUnscaledOp)
6913 *OutUseUnscaledOp = useUnscaledOp;
6914 if (OutUnscaledOp && UnscaledOp)
6915 *OutUnscaledOp = *UnscaledOp;
6916
6917 if (IsMulVL)
6918 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6919 else
6920 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6922 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6923}
6924
6926 unsigned FrameReg, StackOffset &Offset,
6927 const AArch64InstrInfo *TII) {
6928 unsigned Opcode = MI.getOpcode();
6929 unsigned ImmIdx = FrameRegIdx + 1;
6930
6931 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6932 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6933 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6934 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6935 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6936 MI.eraseFromParent();
6937 Offset = StackOffset();
6938 return true;
6939 }
6940
6941 int64_t NewOffset;
6942 unsigned UnscaledOp;
6943 bool UseUnscaledOp;
6944 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6945 &UnscaledOp, &NewOffset);
6948 // Replace the FrameIndex with FrameReg.
6949 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6950 if (UseUnscaledOp)
6951 MI.setDesc(TII->get(UnscaledOp));
6952
6953 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6954 return !Offset;
6955 }
6956
6957 return false;
6958}
6959
6965
6966MCInst AArch64InstrInfo::getNop() const { return MCInstBuilder(AArch64::NOP); }
6967
6968// AArch64 supports MachineCombiner.
6969bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6970
6971// True when Opc sets flag
6972static bool isCombineInstrSettingFlag(unsigned Opc) {
6973 switch (Opc) {
6974 case AArch64::ADDSWrr:
6975 case AArch64::ADDSWri:
6976 case AArch64::ADDSXrr:
6977 case AArch64::ADDSXri:
6978 case AArch64::SUBSWrr:
6979 case AArch64::SUBSXrr:
6980 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6981 case AArch64::SUBSWri:
6982 case AArch64::SUBSXri:
6983 return true;
6984 default:
6985 break;
6986 }
6987 return false;
6988}
6989
6990// 32b Opcodes that can be combined with a MUL
6991static bool isCombineInstrCandidate32(unsigned Opc) {
6992 switch (Opc) {
6993 case AArch64::ADDWrr:
6994 case AArch64::ADDWri:
6995 case AArch64::SUBWrr:
6996 case AArch64::ADDSWrr:
6997 case AArch64::ADDSWri:
6998 case AArch64::SUBSWrr:
6999 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7000 case AArch64::SUBWri:
7001 case AArch64::SUBSWri:
7002 return true;
7003 default:
7004 break;
7005 }
7006 return false;
7007}
7008
7009// 64b Opcodes that can be combined with a MUL
7010static bool isCombineInstrCandidate64(unsigned Opc) {
7011 switch (Opc) {
7012 case AArch64::ADDXrr:
7013 case AArch64::ADDXri:
7014 case AArch64::SUBXrr:
7015 case AArch64::ADDSXrr:
7016 case AArch64::ADDSXri:
7017 case AArch64::SUBSXrr:
7018 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7019 case AArch64::SUBXri:
7020 case AArch64::SUBSXri:
7021 case AArch64::ADDv8i8:
7022 case AArch64::ADDv16i8:
7023 case AArch64::ADDv4i16:
7024 case AArch64::ADDv8i16:
7025 case AArch64::ADDv2i32:
7026 case AArch64::ADDv4i32:
7027 case AArch64::SUBv8i8:
7028 case AArch64::SUBv16i8:
7029 case AArch64::SUBv4i16:
7030 case AArch64::SUBv8i16:
7031 case AArch64::SUBv2i32:
7032 case AArch64::SUBv4i32:
7033 return true;
7034 default:
7035 break;
7036 }
7037 return false;
7038}
7039
7040// FP Opcodes that can be combined with a FMUL.
7041static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
7042 switch (Inst.getOpcode()) {
7043 default:
7044 break;
7045 case AArch64::FADDHrr:
7046 case AArch64::FADDSrr:
7047 case AArch64::FADDDrr:
7048 case AArch64::FADDv4f16:
7049 case AArch64::FADDv8f16:
7050 case AArch64::FADDv2f32:
7051 case AArch64::FADDv2f64:
7052 case AArch64::FADDv4f32:
7053 case AArch64::FSUBHrr:
7054 case AArch64::FSUBSrr:
7055 case AArch64::FSUBDrr:
7056 case AArch64::FSUBv4f16:
7057 case AArch64::FSUBv8f16:
7058 case AArch64::FSUBv2f32:
7059 case AArch64::FSUBv2f64:
7060 case AArch64::FSUBv4f32:
7062 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
7063 // the target options or if FADD/FSUB has the contract fast-math flag.
7064 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
7066 }
7067 return false;
7068}
7069
7070// Opcodes that can be combined with a MUL
7074
7075//
7076// Utility routine that checks if \param MO is defined by an
7077// \param CombineOpc instruction in the basic block \param MBB
7079 unsigned CombineOpc, unsigned ZeroReg = 0,
7080 bool CheckZeroReg = false) {
7081 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7082 MachineInstr *MI = nullptr;
7083
7084 if (MO.isReg() && MO.getReg().isVirtual())
7085 MI = MRI.getUniqueVRegDef(MO.getReg());
7086 // And it needs to be in the trace (otherwise, it won't have a depth).
7087 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
7088 return false;
7089 // Must only used by the user we combine with.
7090 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
7091 return false;
7092
7093 if (CheckZeroReg) {
7094 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
7095 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
7096 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
7097 // The third input reg must be zero.
7098 if (MI->getOperand(3).getReg() != ZeroReg)
7099 return false;
7100 }
7101
7102 if (isCombineInstrSettingFlag(CombineOpc) &&
7103 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
7104 return false;
7105
7106 return true;
7107}
7108
7109//
7110// Is \param MO defined by an integer multiply and can be combined?
7112 unsigned MulOpc, unsigned ZeroReg) {
7113 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
7114}
7115
7116//
7117// Is \param MO defined by a floating-point multiply and can be combined?
7119 unsigned MulOpc) {
7120 return canCombine(MBB, MO, MulOpc);
7121}
7122
7123// TODO: There are many more machine instruction opcodes to match:
7124// 1. Other data types (integer, vectors)
7125// 2. Other math / logic operations (xor, or)
7126// 3. Other forms of the same operation (intrinsics and other variants)
7127bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
7128 bool Invert) const {
7129 if (Invert)
7130 return false;
7131 switch (Inst.getOpcode()) {
7132 // == Floating-point types ==
7133 // -- Floating-point instructions --
7134 case AArch64::FADDHrr:
7135 case AArch64::FADDSrr:
7136 case AArch64::FADDDrr:
7137 case AArch64::FMULHrr:
7138 case AArch64::FMULSrr:
7139 case AArch64::FMULDrr:
7140 case AArch64::FMULX16:
7141 case AArch64::FMULX32:
7142 case AArch64::FMULX64:
7143 // -- Advanced SIMD instructions --
7144 case AArch64::FADDv4f16:
7145 case AArch64::FADDv8f16:
7146 case AArch64::FADDv2f32:
7147 case AArch64::FADDv4f32:
7148 case AArch64::FADDv2f64:
7149 case AArch64::FMULv4f16:
7150 case AArch64::FMULv8f16:
7151 case AArch64::FMULv2f32:
7152 case AArch64::FMULv4f32:
7153 case AArch64::FMULv2f64:
7154 case AArch64::FMULXv4f16:
7155 case AArch64::FMULXv8f16:
7156 case AArch64::FMULXv2f32:
7157 case AArch64::FMULXv4f32:
7158 case AArch64::FMULXv2f64:
7159 // -- SVE instructions --
7160 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
7161 // in the SVE instruction set (though there are predicated ones).
7162 case AArch64::FADD_ZZZ_H:
7163 case AArch64::FADD_ZZZ_S:
7164 case AArch64::FADD_ZZZ_D:
7165 case AArch64::FMUL_ZZZ_H:
7166 case AArch64::FMUL_ZZZ_S:
7167 case AArch64::FMUL_ZZZ_D:
7170
7171 // == Integer types ==
7172 // -- Base instructions --
7173 // Opcodes MULWrr and MULXrr don't exist because
7174 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
7175 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
7176 // The machine-combiner does not support three-source-operands machine
7177 // instruction. So we cannot reassociate MULs.
7178 case AArch64::ADDWrr:
7179 case AArch64::ADDXrr:
7180 case AArch64::ANDWrr:
7181 case AArch64::ANDXrr:
7182 case AArch64::ORRWrr:
7183 case AArch64::ORRXrr:
7184 case AArch64::EORWrr:
7185 case AArch64::EORXrr:
7186 case AArch64::EONWrr:
7187 case AArch64::EONXrr:
7188 // -- Advanced SIMD instructions --
7189 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
7190 // in the Advanced SIMD instruction set.
7191 case AArch64::ADDv8i8:
7192 case AArch64::ADDv16i8:
7193 case AArch64::ADDv4i16:
7194 case AArch64::ADDv8i16:
7195 case AArch64::ADDv2i32:
7196 case AArch64::ADDv4i32:
7197 case AArch64::ADDv1i64:
7198 case AArch64::ADDv2i64:
7199 case AArch64::MULv8i8:
7200 case AArch64::MULv16i8:
7201 case AArch64::MULv4i16:
7202 case AArch64::MULv8i16:
7203 case AArch64::MULv2i32:
7204 case AArch64::MULv4i32:
7205 case AArch64::ANDv8i8:
7206 case AArch64::ANDv16i8:
7207 case AArch64::ORRv8i8:
7208 case AArch64::ORRv16i8:
7209 case AArch64::EORv8i8:
7210 case AArch64::EORv16i8:
7211 // -- SVE instructions --
7212 case AArch64::ADD_ZZZ_B:
7213 case AArch64::ADD_ZZZ_H:
7214 case AArch64::ADD_ZZZ_S:
7215 case AArch64::ADD_ZZZ_D:
7216 case AArch64::MUL_ZZZ_B:
7217 case AArch64::MUL_ZZZ_H:
7218 case AArch64::MUL_ZZZ_S:
7219 case AArch64::MUL_ZZZ_D:
7220 case AArch64::AND_ZZZ:
7221 case AArch64::ORR_ZZZ:
7222 case AArch64::EOR_ZZZ:
7223 return true;
7224
7225 default:
7226 return false;
7227 }
7228}
7229
7230/// Find instructions that can be turned into madd.
7232 SmallVectorImpl<unsigned> &Patterns) {
7233 unsigned Opc = Root.getOpcode();
7234 MachineBasicBlock &MBB = *Root.getParent();
7235 bool Found = false;
7236
7238 return false;
7240 int Cmp_NZCV =
7241 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
7242 // When NZCV is live bail out.
7243 if (Cmp_NZCV == -1)
7244 return false;
7245 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
7246 // When opcode can't change bail out.
7247 // CHECKME: do we miss any cases for opcode conversion?
7248 if (NewOpc == Opc)
7249 return false;
7250 Opc = NewOpc;
7251 }
7252
7253 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
7254 unsigned Pattern) {
7255 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
7256 Patterns.push_back(Pattern);
7257 Found = true;
7258 }
7259 };
7260
7261 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
7262 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
7263 Patterns.push_back(Pattern);
7264 Found = true;
7265 }
7266 };
7267
7269
7270 switch (Opc) {
7271 default:
7272 break;
7273 case AArch64::ADDWrr:
7274 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7275 "ADDWrr does not have register operands");
7276 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
7277 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
7278 break;
7279 case AArch64::ADDXrr:
7280 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
7281 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
7282 break;
7283 case AArch64::SUBWrr:
7284 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
7285 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
7286 break;
7287 case AArch64::SUBXrr:
7288 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
7289 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
7290 break;
7291 case AArch64::ADDWri:
7292 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
7293 break;
7294 case AArch64::ADDXri:
7295 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
7296 break;
7297 case AArch64::SUBWri:
7298 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
7299 break;
7300 case AArch64::SUBXri:
7301 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
7302 break;
7303 case AArch64::ADDv8i8:
7304 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
7305 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
7306 break;
7307 case AArch64::ADDv16i8:
7308 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
7309 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
7310 break;
7311 case AArch64::ADDv4i16:
7312 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
7313 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
7314 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
7315 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
7316 break;
7317 case AArch64::ADDv8i16:
7318 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
7319 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
7320 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
7321 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
7322 break;
7323 case AArch64::ADDv2i32:
7324 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
7325 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
7326 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
7327 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
7328 break;
7329 case AArch64::ADDv4i32:
7330 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
7331 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
7332 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
7333 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
7334 break;
7335 case AArch64::SUBv8i8:
7336 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7337 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7338 break;
7339 case AArch64::SUBv16i8:
7340 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7341 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7342 break;
7343 case AArch64::SUBv4i16:
7344 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7345 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7346 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7347 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7348 break;
7349 case AArch64::SUBv8i16:
7350 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7351 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7352 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7353 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7354 break;
7355 case AArch64::SUBv2i32:
7356 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7357 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7358 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7359 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7360 break;
7361 case AArch64::SUBv4i32:
7362 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7363 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7364 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7365 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7366 break;
7367 }
7368 return Found;
7369}
7370
7371bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7372 switch (Opcode) {
7373 default:
7374 break;
7375 case AArch64::UABALB_ZZZ_D:
7376 case AArch64::UABALB_ZZZ_H:
7377 case AArch64::UABALB_ZZZ_S:
7378 case AArch64::UABALT_ZZZ_D:
7379 case AArch64::UABALT_ZZZ_H:
7380 case AArch64::UABALT_ZZZ_S:
7381 case AArch64::SABALB_ZZZ_D:
7382 case AArch64::SABALB_ZZZ_S:
7383 case AArch64::SABALB_ZZZ_H:
7384 case AArch64::SABALT_ZZZ_D:
7385 case AArch64::SABALT_ZZZ_S:
7386 case AArch64::SABALT_ZZZ_H:
7387 case AArch64::UABALv16i8_v8i16:
7388 case AArch64::UABALv2i32_v2i64:
7389 case AArch64::UABALv4i16_v4i32:
7390 case AArch64::UABALv4i32_v2i64:
7391 case AArch64::UABALv8i16_v4i32:
7392 case AArch64::UABALv8i8_v8i16:
7393 case AArch64::UABAv16i8:
7394 case AArch64::UABAv2i32:
7395 case AArch64::UABAv4i16:
7396 case AArch64::UABAv4i32:
7397 case AArch64::UABAv8i16:
7398 case AArch64::UABAv8i8:
7399 case AArch64::SABALv16i8_v8i16:
7400 case AArch64::SABALv2i32_v2i64:
7401 case AArch64::SABALv4i16_v4i32:
7402 case AArch64::SABALv4i32_v2i64:
7403 case AArch64::SABALv8i16_v4i32:
7404 case AArch64::SABALv8i8_v8i16:
7405 case AArch64::SABAv16i8:
7406 case AArch64::SABAv2i32:
7407 case AArch64::SABAv4i16:
7408 case AArch64::SABAv4i32:
7409 case AArch64::SABAv8i16:
7410 case AArch64::SABAv8i8:
7411 return true;
7412 }
7413
7414 return false;
7415}
7416
7417unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7418 unsigned AccumulationOpcode) const {
7419 switch (AccumulationOpcode) {
7420 default:
7421 llvm_unreachable("Unsupported accumulation Opcode!");
7422 case AArch64::UABALB_ZZZ_D:
7423 return AArch64::UABDLB_ZZZ_D;
7424 case AArch64::UABALB_ZZZ_H:
7425 return AArch64::UABDLB_ZZZ_H;
7426 case AArch64::UABALB_ZZZ_S:
7427 return AArch64::UABDLB_ZZZ_S;
7428 case AArch64::UABALT_ZZZ_D:
7429 return AArch64::UABDLT_ZZZ_D;
7430 case AArch64::UABALT_ZZZ_H:
7431 return AArch64::UABDLT_ZZZ_H;
7432 case AArch64::UABALT_ZZZ_S:
7433 return AArch64::UABDLT_ZZZ_S;
7434 case AArch64::UABALv16i8_v8i16:
7435 return AArch64::UABDLv16i8_v8i16;
7436 case AArch64::UABALv2i32_v2i64:
7437 return AArch64::UABDLv2i32_v2i64;
7438 case AArch64::UABALv4i16_v4i32:
7439 return AArch64::UABDLv4i16_v4i32;
7440 case AArch64::UABALv4i32_v2i64:
7441 return AArch64::UABDLv4i32_v2i64;
7442 case AArch64::UABALv8i16_v4i32:
7443 return AArch64::UABDLv8i16_v4i32;
7444 case AArch64::UABALv8i8_v8i16:
7445 return AArch64::UABDLv8i8_v8i16;
7446 case AArch64::UABAv16i8:
7447 return AArch64::UABDv16i8;
7448 case AArch64::UABAv2i32:
7449 return AArch64::UABDv2i32;
7450 case AArch64::UABAv4i16:
7451 return AArch64::UABDv4i16;
7452 case AArch64::UABAv4i32:
7453 return AArch64::UABDv4i32;
7454 case AArch64::UABAv8i16:
7455 return AArch64::UABDv8i16;
7456 case AArch64::UABAv8i8:
7457 return AArch64::UABDv8i8;
7458 case AArch64::SABALB_ZZZ_D:
7459 return AArch64::SABDLB_ZZZ_D;
7460 case AArch64::SABALB_ZZZ_S:
7461 return AArch64::SABDLB_ZZZ_S;
7462 case AArch64::SABALB_ZZZ_H:
7463 return AArch64::SABDLB_ZZZ_H;
7464 case AArch64::SABALT_ZZZ_D:
7465 return AArch64::SABDLT_ZZZ_D;
7466 case AArch64::SABALT_ZZZ_S:
7467 return AArch64::SABDLT_ZZZ_S;
7468 case AArch64::SABALT_ZZZ_H:
7469 return AArch64::SABDLT_ZZZ_H;
7470 case AArch64::SABALv16i8_v8i16:
7471 return AArch64::SABDLv16i8_v8i16;
7472 case AArch64::SABALv2i32_v2i64:
7473 return AArch64::SABDLv2i32_v2i64;
7474 case AArch64::SABALv4i16_v4i32:
7475 return AArch64::SABDLv4i16_v4i32;
7476 case AArch64::SABALv4i32_v2i64:
7477 return AArch64::SABDLv4i32_v2i64;
7478 case AArch64::SABALv8i16_v4i32:
7479 return AArch64::SABDLv8i16_v4i32;
7480 case AArch64::SABALv8i8_v8i16:
7481 return AArch64::SABDLv8i8_v8i16;
7482 case AArch64::SABAv16i8:
7483 return AArch64::SABDv16i8;
7484 case AArch64::SABAv2i32:
7485 return AArch64::SABAv2i32;
7486 case AArch64::SABAv4i16:
7487 return AArch64::SABDv4i16;
7488 case AArch64::SABAv4i32:
7489 return AArch64::SABDv4i32;
7490 case AArch64::SABAv8i16:
7491 return AArch64::SABDv8i16;
7492 case AArch64::SABAv8i8:
7493 return AArch64::SABDv8i8;
7494 }
7495}
7496
7497/// Floating-Point Support
7498
7499/// Find instructions that can be turned into madd.
7501 SmallVectorImpl<unsigned> &Patterns) {
7502
7503 if (!isCombineInstrCandidateFP(Root))
7504 return false;
7505
7506 MachineBasicBlock &MBB = *Root.getParent();
7507 bool Found = false;
7508
7509 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7510 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7511 Patterns.push_back(Pattern);
7512 return true;
7513 }
7514 return false;
7515 };
7516
7518
7519 switch (Root.getOpcode()) {
7520 default:
7521 assert(false && "Unsupported FP instruction in combiner\n");
7522 break;
7523 case AArch64::FADDHrr:
7524 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7525 "FADDHrr does not have register operands");
7526
7527 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7528 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7529 break;
7530 case AArch64::FADDSrr:
7531 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7532 "FADDSrr does not have register operands");
7533
7534 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7535 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7536
7537 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7538 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7539 break;
7540 case AArch64::FADDDrr:
7541 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7542 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7543
7544 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7545 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7546 break;
7547 case AArch64::FADDv4f16:
7548 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7549 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7550
7551 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7552 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7553 break;
7554 case AArch64::FADDv8f16:
7555 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7556 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7557
7558 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7559 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7560 break;
7561 case AArch64::FADDv2f32:
7562 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7563 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7564
7565 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7566 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7567 break;
7568 case AArch64::FADDv2f64:
7569 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7570 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7571
7572 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7573 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7574 break;
7575 case AArch64::FADDv4f32:
7576 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7577 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7578
7579 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7580 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7581 break;
7582 case AArch64::FSUBHrr:
7583 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7584 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7585 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7586 break;
7587 case AArch64::FSUBSrr:
7588 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7589
7590 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7591 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7592
7593 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7594 break;
7595 case AArch64::FSUBDrr:
7596 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7597
7598 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7599 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7600
7601 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7602 break;
7603 case AArch64::FSUBv4f16:
7604 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7605 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7606
7607 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7608 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7609 break;
7610 case AArch64::FSUBv8f16:
7611 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7612 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7613
7614 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7615 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7616 break;
7617 case AArch64::FSUBv2f32:
7618 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7619 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7620
7621 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7622 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7623 break;
7624 case AArch64::FSUBv2f64:
7625 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7626 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7627
7628 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7629 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7630 break;
7631 case AArch64::FSUBv4f32:
7632 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7633 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7634
7635 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7636 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7637 break;
7638 }
7639 return Found;
7640}
7641
7643 SmallVectorImpl<unsigned> &Patterns) {
7644 MachineBasicBlock &MBB = *Root.getParent();
7645 bool Found = false;
7646
7647 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7648 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7649 MachineOperand &MO = Root.getOperand(Operand);
7650 MachineInstr *MI = nullptr;
7651 if (MO.isReg() && MO.getReg().isVirtual())
7652 MI = MRI.getUniqueVRegDef(MO.getReg());
7653 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7654 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7655 MI->getOperand(1).getReg().isVirtual())
7656 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7657 if (MI && MI->getOpcode() == Opcode) {
7658 Patterns.push_back(Pattern);
7659 return true;
7660 }
7661 return false;
7662 };
7663
7665
7666 switch (Root.getOpcode()) {
7667 default:
7668 return false;
7669 case AArch64::FMULv2f32:
7670 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7671 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7672 break;
7673 case AArch64::FMULv2f64:
7674 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7675 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7676 break;
7677 case AArch64::FMULv4f16:
7678 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7679 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7680 break;
7681 case AArch64::FMULv4f32:
7682 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7683 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7684 break;
7685 case AArch64::FMULv8f16:
7686 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7687 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7688 break;
7689 }
7690
7691 return Found;
7692}
7693
7695 SmallVectorImpl<unsigned> &Patterns) {
7696 unsigned Opc = Root.getOpcode();
7697 MachineBasicBlock &MBB = *Root.getParent();
7698 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7699
7700 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7701 MachineOperand &MO = Root.getOperand(1);
7702 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7703 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7704 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7708 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7709 Patterns.push_back(Pattern);
7710 return true;
7711 }
7712 return false;
7713 };
7714
7715 switch (Opc) {
7716 default:
7717 break;
7718 case AArch64::FNEGDr:
7719 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7720 case AArch64::FNEGSr:
7721 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7722 }
7723
7724 return false;
7725}
7726
7727/// Return true when a code sequence can improve throughput. It
7728/// should be called only for instructions in loops.
7729/// \param Pattern - combiner pattern
7731 switch (Pattern) {
7732 default:
7733 break;
7839 return true;
7840 } // end switch (Pattern)
7841 return false;
7842}
7843
7844/// Find other MI combine patterns.
7846 SmallVectorImpl<unsigned> &Patterns) {
7847 // A - (B + C) ==> (A - B) - C or (A - C) - B
7848 unsigned Opc = Root.getOpcode();
7849 MachineBasicBlock &MBB = *Root.getParent();
7850
7851 switch (Opc) {
7852 case AArch64::SUBWrr:
7853 case AArch64::SUBSWrr:
7854 case AArch64::SUBXrr:
7855 case AArch64::SUBSXrr:
7856 // Found candidate root.
7857 break;
7858 default:
7859 return false;
7860 }
7861
7863 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7864 -1)
7865 return false;
7866
7867 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7868 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7869 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7870 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7873 return true;
7874 }
7875
7876 return false;
7877}
7878
7879/// Check if the given instruction forms a gather load pattern that can be
7880/// optimized for better Memory-Level Parallelism (MLP). This function
7881/// identifies chains of NEON lane load instructions that load data from
7882/// different memory addresses into individual lanes of a 128-bit vector
7883/// register, then attempts to split the pattern into parallel loads to break
7884/// the serial dependency between instructions.
7885///
7886/// Pattern Matched:
7887/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7888/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7889///
7890/// Transformed Into:
7891/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7892/// to combine the results, enabling better memory-level parallelism.
7893///
7894/// Supported Element Types:
7895/// - 32-bit elements (LD1i32, 4 lanes total)
7896/// - 16-bit elements (LD1i16, 8 lanes total)
7897/// - 8-bit elements (LD1i8, 16 lanes total)
7899 SmallVectorImpl<unsigned> &Patterns,
7900 unsigned LoadLaneOpCode, unsigned NumLanes) {
7901 const MachineFunction *MF = Root.getMF();
7902
7903 // Early exit if optimizing for size.
7904 if (MF->getFunction().hasMinSize())
7905 return false;
7906
7907 const MachineRegisterInfo &MRI = MF->getRegInfo();
7909
7910 // The root of the pattern must load into the last lane of the vector.
7911 if (Root.getOperand(2).getImm() != NumLanes - 1)
7912 return false;
7913
7914 // Check that we have load into all lanes except lane 0.
7915 // For each load we also want to check that:
7916 // 1. It has a single non-debug use (since we will be replacing the virtual
7917 // register)
7918 // 2. That the addressing mode only uses a single pointer operand
7919 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7920 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7921 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7923 while (!RemainingLanes.empty() && CurrInstr &&
7924 CurrInstr->getOpcode() == LoadLaneOpCode &&
7925 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7926 CurrInstr->getNumOperands() == 4) {
7927 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7928 LoadInstrs.push_back(CurrInstr);
7929 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7930 }
7931
7932 // Check that we have found a match for lanes N-1.. 1.
7933 if (!RemainingLanes.empty())
7934 return false;
7935
7936 // Match the SUBREG_TO_REG sequence.
7937 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
7938 return false;
7939
7940 // Verify that the subreg to reg loads an integer into the first lane.
7941 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7942 unsigned SingleLaneSizeInBits = 128 / NumLanes;
7943 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
7944 return false;
7945
7946 // Verify that it also has a single non debug use.
7947 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
7948 return false;
7949
7950 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
7951
7952 // If there is any chance of aliasing, do not apply the pattern.
7953 // Walk backward through the MBB starting from Root.
7954 // Exit early if we've encountered all load instructions or hit the search
7955 // limit.
7956 auto MBBItr = Root.getIterator();
7957 unsigned RemainingSteps = GatherOptSearchLimit;
7958 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
7959 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7960 const MachineBasicBlock *MBB = Root.getParent();
7961
7962 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7963 !RemainingLoadInstrs.empty();
7964 --MBBItr, --RemainingSteps) {
7965 const MachineInstr &CurrInstr = *MBBItr;
7966
7967 // Remove this instruction from remaining loads if it's one we're tracking.
7968 RemainingLoadInstrs.erase(&CurrInstr);
7969
7970 // Check for potential aliasing with any of the load instructions to
7971 // optimize.
7972 if (CurrInstr.isLoadFoldBarrier())
7973 return false;
7974 }
7975
7976 // If we hit the search limit without finding all load instructions,
7977 // don't match the pattern.
7978 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7979 return false;
7980
7981 switch (NumLanes) {
7982 case 4:
7984 break;
7985 case 8:
7987 break;
7988 case 16:
7990 break;
7991 default:
7992 llvm_unreachable("Got bad number of lanes for gather pattern.");
7993 }
7994
7995 return true;
7996}
7997
7998/// Search for patterns of LD instructions we can optimize.
8000 SmallVectorImpl<unsigned> &Patterns) {
8001
8002 // The pattern searches for loads into single lanes.
8003 switch (Root.getOpcode()) {
8004 case AArch64::LD1i32:
8005 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
8006 case AArch64::LD1i16:
8007 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
8008 case AArch64::LD1i8:
8009 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
8010 default:
8011 return false;
8012 }
8013}
8014
8015/// Generate optimized instruction sequence for gather load patterns to improve
8016/// Memory-Level Parallelism (MLP). This function transforms a chain of
8017/// sequential NEON lane loads into parallel vector loads that can execute
8018/// concurrently.
8019static void
8023 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8024 unsigned Pattern, unsigned NumLanes) {
8025 MachineFunction &MF = *Root.getParent()->getParent();
8028
8029 // Gather the initial load instructions to build the pattern.
8030 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
8031 MachineInstr *CurrInstr = &Root;
8032 for (unsigned i = 0; i < NumLanes - 1; ++i) {
8033 LoadToLaneInstrs.push_back(CurrInstr);
8034 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
8035 }
8036
8037 // Sort the load instructions according to the lane.
8038 llvm::sort(LoadToLaneInstrs,
8039 [](const MachineInstr *A, const MachineInstr *B) {
8040 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
8041 });
8042
8043 MachineInstr *SubregToReg = CurrInstr;
8044 LoadToLaneInstrs.push_back(
8045 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
8046 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
8047
8048 const TargetRegisterClass *FPR128RegClass =
8049 MRI.getRegClass(Root.getOperand(0).getReg());
8050
8051 // Helper lambda to create a LD1 instruction.
8052 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
8053 Register SrcRegister, unsigned Lane,
8054 Register OffsetRegister,
8055 bool OffsetRegisterKillState) {
8056 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
8057 MachineInstrBuilder LoadIndexIntoRegister =
8058 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
8059 NewRegister)
8060 .addReg(SrcRegister)
8061 .addImm(Lane)
8062 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
8063 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
8064 InsInstrs.push_back(LoadIndexIntoRegister);
8065 return NewRegister;
8066 };
8067
8068 // Helper to create load instruction based on the NumLanes in the NEON
8069 // register we are rewriting.
8070 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
8071 Register OffsetReg,
8072 bool KillState) -> MachineInstrBuilder {
8073 unsigned Opcode;
8074 switch (NumLanes) {
8075 case 4:
8076 Opcode = AArch64::LDRSui;
8077 break;
8078 case 8:
8079 Opcode = AArch64::LDRHui;
8080 break;
8081 case 16:
8082 Opcode = AArch64::LDRBui;
8083 break;
8084 default:
8086 "Got unsupported number of lanes in machine-combiner gather pattern");
8087 }
8088 // Immediate offset load
8089 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
8090 .addReg(OffsetReg)
8091 .addImm(0);
8092 };
8093
8094 // Load the remaining lanes into register 0.
8095 auto LanesToLoadToReg0 =
8096 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
8097 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
8098 Register PrevReg = SubregToReg->getOperand(0).getReg();
8099 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
8100 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8101 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8102 OffsetRegOperand.getReg(),
8103 OffsetRegOperand.isKill());
8104 DelInstrs.push_back(LoadInstr);
8105 }
8106 Register LastLoadReg0 = PrevReg;
8107
8108 // First load into register 1. Perform an integer load to zero out the upper
8109 // lanes in a single instruction.
8110 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
8111 MachineInstr *OriginalSplitLoad =
8112 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
8113 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
8114 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
8115
8116 const MachineOperand &OriginalSplitToLoadOffsetOperand =
8117 OriginalSplitLoad->getOperand(3);
8118 MachineInstrBuilder MiddleIndexLoadInstr =
8119 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
8120 OriginalSplitToLoadOffsetOperand.getReg(),
8121 OriginalSplitToLoadOffsetOperand.isKill());
8122
8123 InstrIdxForVirtReg.insert(
8124 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
8125 InsInstrs.push_back(MiddleIndexLoadInstr);
8126 DelInstrs.push_back(OriginalSplitLoad);
8127
8128 // Subreg To Reg instruction for register 1.
8129 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
8130 unsigned SubregType;
8131 switch (NumLanes) {
8132 case 4:
8133 SubregType = AArch64::ssub;
8134 break;
8135 case 8:
8136 SubregType = AArch64::hsub;
8137 break;
8138 case 16:
8139 SubregType = AArch64::bsub;
8140 break;
8141 default:
8143 "Got invalid NumLanes for machine-combiner gather pattern");
8144 }
8145
8146 auto SubRegToRegInstr =
8147 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
8148 DestRegForSubregToReg)
8149 .addImm(0)
8150 .addReg(DestRegForMiddleIndex, getKillRegState(true))
8151 .addImm(SubregType);
8152 InstrIdxForVirtReg.insert(
8153 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
8154 InsInstrs.push_back(SubRegToRegInstr);
8155
8156 // Load remaining lanes into register 1.
8157 auto LanesToLoadToReg1 =
8158 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
8159 LoadToLaneInstrsAscending.end());
8160 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
8161 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
8162 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8163 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8164 OffsetRegOperand.getReg(),
8165 OffsetRegOperand.isKill());
8166
8167 // Do not add the last reg to DelInstrs - it will be removed later.
8168 if (Index == NumLanes / 2 - 2) {
8169 break;
8170 }
8171 DelInstrs.push_back(LoadInstr);
8172 }
8173 Register LastLoadReg1 = PrevReg;
8174
8175 // Create the final zip instruction to combine the results.
8176 MachineInstrBuilder ZipInstr =
8177 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
8178 Root.getOperand(0).getReg())
8179 .addReg(LastLoadReg0)
8180 .addReg(LastLoadReg1);
8181 InsInstrs.push_back(ZipInstr);
8182}
8183
8197
8198/// Return true when there is potentially a faster code sequence for an
8199/// instruction chain ending in \p Root. All potential patterns are listed in
8200/// the \p Pattern vector. Pattern should be sorted in priority order since the
8201/// pattern evaluator stops checking as soon as it finds a faster sequence.
8202
8203bool AArch64InstrInfo::getMachineCombinerPatterns(
8204 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
8205 bool DoRegPressureReduce) const {
8206 // Integer patterns
8207 if (getMaddPatterns(Root, Patterns))
8208 return true;
8209 // Floating point patterns
8210 if (getFMULPatterns(Root, Patterns))
8211 return true;
8212 if (getFMAPatterns(Root, Patterns))
8213 return true;
8214 if (getFNEGPatterns(Root, Patterns))
8215 return true;
8216
8217 // Other patterns
8218 if (getMiscPatterns(Root, Patterns))
8219 return true;
8220
8221 // Load patterns
8222 if (getLoadPatterns(Root, Patterns))
8223 return true;
8224
8225 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
8226 DoRegPressureReduce);
8227}
8228
8230/// genFusedMultiply - Generate fused multiply instructions.
8231/// This function supports both integer and floating point instructions.
8232/// A typical example:
8233/// F|MUL I=A,B,0
8234/// F|ADD R,I,C
8235/// ==> F|MADD R,A,B,C
8236/// \param MF Containing MachineFunction
8237/// \param MRI Register information
8238/// \param TII Target information
8239/// \param Root is the F|ADD instruction
8240/// \param [out] InsInstrs is a vector of machine instructions and will
8241/// contain the generated madd instruction
8242/// \param IdxMulOpd is index of operand in Root that is the result of
8243/// the F|MUL. In the example above IdxMulOpd is 1.
8244/// \param MaddOpc the opcode fo the f|madd instruction
8245/// \param RC Register class of operands
8246/// \param kind of fma instruction (addressing mode) to be generated
8247/// \param ReplacedAddend is the result register from the instruction
8248/// replacing the non-combined operand, if any.
8249static MachineInstr *
8251 const TargetInstrInfo *TII, MachineInstr &Root,
8252 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
8253 unsigned MaddOpc, const TargetRegisterClass *RC,
8255 const Register *ReplacedAddend = nullptr) {
8256 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8257
8258 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
8259 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8260 Register ResultReg = Root.getOperand(0).getReg();
8261 Register SrcReg0 = MUL->getOperand(1).getReg();
8262 bool Src0IsKill = MUL->getOperand(1).isKill();
8263 Register SrcReg1 = MUL->getOperand(2).getReg();
8264 bool Src1IsKill = MUL->getOperand(2).isKill();
8265
8266 Register SrcReg2;
8267 bool Src2IsKill;
8268 if (ReplacedAddend) {
8269 // If we just generated a new addend, we must be it's only use.
8270 SrcReg2 = *ReplacedAddend;
8271 Src2IsKill = true;
8272 } else {
8273 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
8274 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
8275 }
8276
8277 if (ResultReg.isVirtual())
8278 MRI.constrainRegClass(ResultReg, RC);
8279 if (SrcReg0.isVirtual())
8280 MRI.constrainRegClass(SrcReg0, RC);
8281 if (SrcReg1.isVirtual())
8282 MRI.constrainRegClass(SrcReg1, RC);
8283 if (SrcReg2.isVirtual())
8284 MRI.constrainRegClass(SrcReg2, RC);
8285
8287 if (kind == FMAInstKind::Default)
8288 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8289 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8290 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8291 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8292 else if (kind == FMAInstKind::Indexed)
8293 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8294 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8295 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8296 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8297 .addImm(MUL->getOperand(3).getImm());
8298 else if (kind == FMAInstKind::Accumulator)
8299 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8300 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8301 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8302 .addReg(SrcReg1, getKillRegState(Src1IsKill));
8303 else
8304 assert(false && "Invalid FMA instruction kind \n");
8305 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
8306 InsInstrs.push_back(MIB);
8307 return MUL;
8308}
8309
8310static MachineInstr *
8312 const TargetInstrInfo *TII, MachineInstr &Root,
8314 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8315
8316 unsigned Opc = 0;
8317 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
8318 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
8319 Opc = AArch64::FNMADDSrrr;
8320 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
8321 Opc = AArch64::FNMADDDrrr;
8322 else
8323 return nullptr;
8324
8325 Register ResultReg = Root.getOperand(0).getReg();
8326 Register SrcReg0 = MAD->getOperand(1).getReg();
8327 Register SrcReg1 = MAD->getOperand(2).getReg();
8328 Register SrcReg2 = MAD->getOperand(3).getReg();
8329 bool Src0IsKill = MAD->getOperand(1).isKill();
8330 bool Src1IsKill = MAD->getOperand(2).isKill();
8331 bool Src2IsKill = MAD->getOperand(3).isKill();
8332 if (ResultReg.isVirtual())
8333 MRI.constrainRegClass(ResultReg, RC);
8334 if (SrcReg0.isVirtual())
8335 MRI.constrainRegClass(SrcReg0, RC);
8336 if (SrcReg1.isVirtual())
8337 MRI.constrainRegClass(SrcReg1, RC);
8338 if (SrcReg2.isVirtual())
8339 MRI.constrainRegClass(SrcReg2, RC);
8340
8342 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8343 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8344 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8345 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8346 InsInstrs.push_back(MIB);
8347
8348 return MAD;
8349}
8350
8351/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8352static MachineInstr *
8355 unsigned IdxDupOp, unsigned MulOpc,
8357 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8358 "Invalid index of FMUL operand");
8359
8360 MachineFunction &MF = *Root.getMF();
8362
8363 MachineInstr *Dup =
8364 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8365
8366 if (Dup->getOpcode() == TargetOpcode::COPY)
8367 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8368
8369 Register DupSrcReg = Dup->getOperand(1).getReg();
8370 MRI.clearKillFlags(DupSrcReg);
8371 MRI.constrainRegClass(DupSrcReg, RC);
8372
8373 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8374
8375 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8376 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8377
8378 Register ResultReg = Root.getOperand(0).getReg();
8379
8381 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8382 .add(MulOp)
8383 .addReg(DupSrcReg)
8384 .addImm(DupSrcLane);
8385
8386 InsInstrs.push_back(MIB);
8387 return &Root;
8388}
8389
8390/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8391/// instructions.
8392///
8393/// \see genFusedMultiply
8397 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8398 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8400}
8401
8402/// genNeg - Helper to generate an intermediate negation of the second operand
8403/// of Root
8405 const TargetInstrInfo *TII, MachineInstr &Root,
8407 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8408 unsigned MnegOpc, const TargetRegisterClass *RC) {
8409 Register NewVR = MRI.createVirtualRegister(RC);
8411 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8412 .add(Root.getOperand(2));
8413 InsInstrs.push_back(MIB);
8414
8415 assert(InstrIdxForVirtReg.empty());
8416 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8417
8418 return NewVR;
8419}
8420
8421/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8422/// instructions with an additional negation of the accumulator
8426 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8427 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8428 assert(IdxMulOpd == 1);
8429
8430 Register NewVR =
8431 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8432 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8433 FMAInstKind::Accumulator, &NewVR);
8434}
8435
8436/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8437/// instructions.
8438///
8439/// \see genFusedMultiply
8443 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8444 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8446}
8447
8448/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8449/// instructions with an additional negation of the accumulator
8453 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8454 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8455 assert(IdxMulOpd == 1);
8456
8457 Register NewVR =
8458 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8459
8460 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8461 FMAInstKind::Indexed, &NewVR);
8462}
8463
8464/// genMaddR - Generate madd instruction and combine mul and add using
8465/// an extra virtual register
8466/// Example - an ADD intermediate needs to be stored in a register:
8467/// MUL I=A,B,0
8468/// ADD R,I,Imm
8469/// ==> ORR V, ZR, Imm
8470/// ==> MADD R,A,B,V
8471/// \param MF Containing MachineFunction
8472/// \param MRI Register information
8473/// \param TII Target information
8474/// \param Root is the ADD instruction
8475/// \param [out] InsInstrs is a vector of machine instructions and will
8476/// contain the generated madd instruction
8477/// \param IdxMulOpd is index of operand in Root that is the result of
8478/// the MUL. In the example above IdxMulOpd is 1.
8479/// \param MaddOpc the opcode fo the madd instruction
8480/// \param VR is a virtual register that holds the value of an ADD operand
8481/// (V in the example above).
8482/// \param RC Register class of operands
8484 const TargetInstrInfo *TII, MachineInstr &Root,
8486 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8487 const TargetRegisterClass *RC) {
8488 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8489
8490 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8491 Register ResultReg = Root.getOperand(0).getReg();
8492 Register SrcReg0 = MUL->getOperand(1).getReg();
8493 bool Src0IsKill = MUL->getOperand(1).isKill();
8494 Register SrcReg1 = MUL->getOperand(2).getReg();
8495 bool Src1IsKill = MUL->getOperand(2).isKill();
8496
8497 if (ResultReg.isVirtual())
8498 MRI.constrainRegClass(ResultReg, RC);
8499 if (SrcReg0.isVirtual())
8500 MRI.constrainRegClass(SrcReg0, RC);
8501 if (SrcReg1.isVirtual())
8502 MRI.constrainRegClass(SrcReg1, RC);
8504 MRI.constrainRegClass(VR, RC);
8505
8507 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8508 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8509 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8510 .addReg(VR);
8511 // Insert the MADD
8512 InsInstrs.push_back(MIB);
8513 return MUL;
8514}
8515
8516/// Do the following transformation
8517/// A - (B + C) ==> (A - B) - C
8518/// A - (B + C) ==> (A - C) - B
8520 const TargetInstrInfo *TII, MachineInstr &Root,
8523 unsigned IdxOpd1,
8524 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8525 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8526 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8527 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8528
8529 Register ResultReg = Root.getOperand(0).getReg();
8530 Register RegA = Root.getOperand(1).getReg();
8531 bool RegAIsKill = Root.getOperand(1).isKill();
8532 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8533 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8534 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8535 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8536 Register NewVR =
8537 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8538
8539 unsigned Opcode = Root.getOpcode();
8540 if (Opcode == AArch64::SUBSWrr)
8541 Opcode = AArch64::SUBWrr;
8542 else if (Opcode == AArch64::SUBSXrr)
8543 Opcode = AArch64::SUBXrr;
8544 else
8545 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8546 "Unexpected instruction opcode.");
8547
8548 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8549 Flags &= ~MachineInstr::NoSWrap;
8550 Flags &= ~MachineInstr::NoUWrap;
8551
8552 MachineInstrBuilder MIB1 =
8553 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8554 .addReg(RegA, getKillRegState(RegAIsKill))
8555 .addReg(RegB, getKillRegState(RegBIsKill))
8556 .setMIFlags(Flags);
8557 MachineInstrBuilder MIB2 =
8558 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8559 .addReg(NewVR, getKillRegState(true))
8560 .addReg(RegC, getKillRegState(RegCIsKill))
8561 .setMIFlags(Flags);
8562
8563 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8564 InsInstrs.push_back(MIB1);
8565 InsInstrs.push_back(MIB2);
8566 DelInstrs.push_back(AddMI);
8567 DelInstrs.push_back(&Root);
8568}
8569
8570unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8571 unsigned int AccumulatorOpCode) const {
8572 switch (AccumulatorOpCode) {
8573 case AArch64::UABALB_ZZZ_D:
8574 case AArch64::SABALB_ZZZ_D:
8575 case AArch64::UABALT_ZZZ_D:
8576 case AArch64::SABALT_ZZZ_D:
8577 return AArch64::ADD_ZZZ_D;
8578 case AArch64::UABALB_ZZZ_H:
8579 case AArch64::SABALB_ZZZ_H:
8580 case AArch64::UABALT_ZZZ_H:
8581 case AArch64::SABALT_ZZZ_H:
8582 return AArch64::ADD_ZZZ_H;
8583 case AArch64::UABALB_ZZZ_S:
8584 case AArch64::SABALB_ZZZ_S:
8585 case AArch64::UABALT_ZZZ_S:
8586 case AArch64::SABALT_ZZZ_S:
8587 return AArch64::ADD_ZZZ_S;
8588 case AArch64::UABALv16i8_v8i16:
8589 case AArch64::SABALv8i8_v8i16:
8590 case AArch64::SABAv8i16:
8591 case AArch64::UABAv8i16:
8592 return AArch64::ADDv8i16;
8593 case AArch64::SABALv2i32_v2i64:
8594 case AArch64::UABALv2i32_v2i64:
8595 case AArch64::SABALv4i32_v2i64:
8596 return AArch64::ADDv2i64;
8597 case AArch64::UABALv4i16_v4i32:
8598 case AArch64::SABALv4i16_v4i32:
8599 case AArch64::SABALv8i16_v4i32:
8600 case AArch64::SABAv4i32:
8601 case AArch64::UABAv4i32:
8602 return AArch64::ADDv4i32;
8603 case AArch64::UABALv4i32_v2i64:
8604 return AArch64::ADDv2i64;
8605 case AArch64::UABALv8i16_v4i32:
8606 return AArch64::ADDv4i32;
8607 case AArch64::UABALv8i8_v8i16:
8608 case AArch64::SABALv16i8_v8i16:
8609 return AArch64::ADDv8i16;
8610 case AArch64::UABAv16i8:
8611 case AArch64::SABAv16i8:
8612 return AArch64::ADDv16i8;
8613 case AArch64::UABAv4i16:
8614 case AArch64::SABAv4i16:
8615 return AArch64::ADDv4i16;
8616 case AArch64::UABAv2i32:
8617 case AArch64::SABAv2i32:
8618 return AArch64::ADDv2i32;
8619 case AArch64::UABAv8i8:
8620 case AArch64::SABAv8i8:
8621 return AArch64::ADDv8i8;
8622 default:
8623 llvm_unreachable("Unknown accumulator opcode");
8624 }
8625}
8626
8627/// When getMachineCombinerPatterns() finds potential patterns,
8628/// this function generates the instructions that could replace the
8629/// original code sequence
8630void AArch64InstrInfo::genAlternativeCodeSequence(
8631 MachineInstr &Root, unsigned Pattern,
8634 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8635 MachineBasicBlock &MBB = *Root.getParent();
8636 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8637 MachineFunction &MF = *MBB.getParent();
8638 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8639
8640 MachineInstr *MUL = nullptr;
8641 const TargetRegisterClass *RC;
8642 unsigned Opc;
8643 switch (Pattern) {
8644 default:
8645 // Reassociate instructions.
8646 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8647 DelInstrs, InstrIdxForVirtReg);
8648 return;
8650 // A - (B + C)
8651 // ==> (A - B) - C
8652 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8653 InstrIdxForVirtReg);
8654 return;
8656 // A - (B + C)
8657 // ==> (A - C) - B
8658 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8659 InstrIdxForVirtReg);
8660 return;
8663 // MUL I=A,B,0
8664 // ADD R,I,C
8665 // ==> MADD R,A,B,C
8666 // --- Create(MADD);
8668 Opc = AArch64::MADDWrrr;
8669 RC = &AArch64::GPR32RegClass;
8670 } else {
8671 Opc = AArch64::MADDXrrr;
8672 RC = &AArch64::GPR64RegClass;
8673 }
8674 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8675 break;
8678 // MUL I=A,B,0
8679 // ADD R,C,I
8680 // ==> MADD R,A,B,C
8681 // --- Create(MADD);
8683 Opc = AArch64::MADDWrrr;
8684 RC = &AArch64::GPR32RegClass;
8685 } else {
8686 Opc = AArch64::MADDXrrr;
8687 RC = &AArch64::GPR64RegClass;
8688 }
8689 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8690 break;
8695 // MUL I=A,B,0
8696 // ADD/SUB R,I,Imm
8697 // ==> MOV V, Imm/-Imm
8698 // ==> MADD R,A,B,V
8699 // --- Create(MADD);
8700 const TargetRegisterClass *RC;
8701 unsigned BitSize, MovImm;
8704 MovImm = AArch64::MOVi32imm;
8705 RC = &AArch64::GPR32spRegClass;
8706 BitSize = 32;
8707 Opc = AArch64::MADDWrrr;
8708 RC = &AArch64::GPR32RegClass;
8709 } else {
8710 MovImm = AArch64::MOVi64imm;
8711 RC = &AArch64::GPR64spRegClass;
8712 BitSize = 64;
8713 Opc = AArch64::MADDXrrr;
8714 RC = &AArch64::GPR64RegClass;
8715 }
8716 Register NewVR = MRI.createVirtualRegister(RC);
8717 uint64_t Imm = Root.getOperand(2).getImm();
8718
8719 if (Root.getOperand(3).isImm()) {
8720 unsigned Val = Root.getOperand(3).getImm();
8721 Imm = Imm << Val;
8722 }
8723 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8725 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8726 // Check that the immediate can be composed via a single instruction.
8728 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8729 if (Insn.size() != 1)
8730 return;
8731 MachineInstrBuilder MIB1 =
8732 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8733 .addImm(IsSub ? -Imm : Imm);
8734 InsInstrs.push_back(MIB1);
8735 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8736 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8737 break;
8738 }
8741 // MUL I=A,B,0
8742 // SUB R,I, C
8743 // ==> SUB V, 0, C
8744 // ==> MADD R,A,B,V // = -C + A*B
8745 // --- Create(MADD);
8746 const TargetRegisterClass *SubRC;
8747 unsigned SubOpc, ZeroReg;
8749 SubOpc = AArch64::SUBWrr;
8750 SubRC = &AArch64::GPR32spRegClass;
8751 ZeroReg = AArch64::WZR;
8752 Opc = AArch64::MADDWrrr;
8753 RC = &AArch64::GPR32RegClass;
8754 } else {
8755 SubOpc = AArch64::SUBXrr;
8756 SubRC = &AArch64::GPR64spRegClass;
8757 ZeroReg = AArch64::XZR;
8758 Opc = AArch64::MADDXrrr;
8759 RC = &AArch64::GPR64RegClass;
8760 }
8761 Register NewVR = MRI.createVirtualRegister(SubRC);
8762 // SUB NewVR, 0, C
8763 MachineInstrBuilder MIB1 =
8764 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8765 .addReg(ZeroReg)
8766 .add(Root.getOperand(2));
8767 InsInstrs.push_back(MIB1);
8768 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8769 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8770 break;
8771 }
8774 // MUL I=A,B,0
8775 // SUB R,C,I
8776 // ==> MSUB R,A,B,C (computes C - A*B)
8777 // --- Create(MSUB);
8779 Opc = AArch64::MSUBWrrr;
8780 RC = &AArch64::GPR32RegClass;
8781 } else {
8782 Opc = AArch64::MSUBXrrr;
8783 RC = &AArch64::GPR64RegClass;
8784 }
8785 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8786 break;
8788 Opc = AArch64::MLAv8i8;
8789 RC = &AArch64::FPR64RegClass;
8790 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8791 break;
8793 Opc = AArch64::MLAv8i8;
8794 RC = &AArch64::FPR64RegClass;
8795 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8796 break;
8798 Opc = AArch64::MLAv16i8;
8799 RC = &AArch64::FPR128RegClass;
8800 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8801 break;
8803 Opc = AArch64::MLAv16i8;
8804 RC = &AArch64::FPR128RegClass;
8805 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8806 break;
8808 Opc = AArch64::MLAv4i16;
8809 RC = &AArch64::FPR64RegClass;
8810 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8811 break;
8813 Opc = AArch64::MLAv4i16;
8814 RC = &AArch64::FPR64RegClass;
8815 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8816 break;
8818 Opc = AArch64::MLAv8i16;
8819 RC = &AArch64::FPR128RegClass;
8820 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8821 break;
8823 Opc = AArch64::MLAv8i16;
8824 RC = &AArch64::FPR128RegClass;
8825 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8826 break;
8828 Opc = AArch64::MLAv2i32;
8829 RC = &AArch64::FPR64RegClass;
8830 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8831 break;
8833 Opc = AArch64::MLAv2i32;
8834 RC = &AArch64::FPR64RegClass;
8835 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8836 break;
8838 Opc = AArch64::MLAv4i32;
8839 RC = &AArch64::FPR128RegClass;
8840 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8841 break;
8843 Opc = AArch64::MLAv4i32;
8844 RC = &AArch64::FPR128RegClass;
8845 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8846 break;
8847
8849 Opc = AArch64::MLAv8i8;
8850 RC = &AArch64::FPR64RegClass;
8851 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8852 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8853 RC);
8854 break;
8856 Opc = AArch64::MLSv8i8;
8857 RC = &AArch64::FPR64RegClass;
8858 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8859 break;
8861 Opc = AArch64::MLAv16i8;
8862 RC = &AArch64::FPR128RegClass;
8863 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8864 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8865 RC);
8866 break;
8868 Opc = AArch64::MLSv16i8;
8869 RC = &AArch64::FPR128RegClass;
8870 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8871 break;
8873 Opc = AArch64::MLAv4i16;
8874 RC = &AArch64::FPR64RegClass;
8875 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8876 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8877 RC);
8878 break;
8880 Opc = AArch64::MLSv4i16;
8881 RC = &AArch64::FPR64RegClass;
8882 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8883 break;
8885 Opc = AArch64::MLAv8i16;
8886 RC = &AArch64::FPR128RegClass;
8887 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8888 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8889 RC);
8890 break;
8892 Opc = AArch64::MLSv8i16;
8893 RC = &AArch64::FPR128RegClass;
8894 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8895 break;
8897 Opc = AArch64::MLAv2i32;
8898 RC = &AArch64::FPR64RegClass;
8899 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8900 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8901 RC);
8902 break;
8904 Opc = AArch64::MLSv2i32;
8905 RC = &AArch64::FPR64RegClass;
8906 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8907 break;
8909 Opc = AArch64::MLAv4i32;
8910 RC = &AArch64::FPR128RegClass;
8911 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8912 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8913 RC);
8914 break;
8916 Opc = AArch64::MLSv4i32;
8917 RC = &AArch64::FPR128RegClass;
8918 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8919 break;
8920
8922 Opc = AArch64::MLAv4i16_indexed;
8923 RC = &AArch64::FPR64RegClass;
8924 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8925 break;
8927 Opc = AArch64::MLAv4i16_indexed;
8928 RC = &AArch64::FPR64RegClass;
8929 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8930 break;
8932 Opc = AArch64::MLAv8i16_indexed;
8933 RC = &AArch64::FPR128RegClass;
8934 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8935 break;
8937 Opc = AArch64::MLAv8i16_indexed;
8938 RC = &AArch64::FPR128RegClass;
8939 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8940 break;
8942 Opc = AArch64::MLAv2i32_indexed;
8943 RC = &AArch64::FPR64RegClass;
8944 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8945 break;
8947 Opc = AArch64::MLAv2i32_indexed;
8948 RC = &AArch64::FPR64RegClass;
8949 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8950 break;
8952 Opc = AArch64::MLAv4i32_indexed;
8953 RC = &AArch64::FPR128RegClass;
8954 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8955 break;
8957 Opc = AArch64::MLAv4i32_indexed;
8958 RC = &AArch64::FPR128RegClass;
8959 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8960 break;
8961
8963 Opc = AArch64::MLAv4i16_indexed;
8964 RC = &AArch64::FPR64RegClass;
8965 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8966 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8967 RC);
8968 break;
8970 Opc = AArch64::MLSv4i16_indexed;
8971 RC = &AArch64::FPR64RegClass;
8972 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8973 break;
8975 Opc = AArch64::MLAv8i16_indexed;
8976 RC = &AArch64::FPR128RegClass;
8977 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8978 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8979 RC);
8980 break;
8982 Opc = AArch64::MLSv8i16_indexed;
8983 RC = &AArch64::FPR128RegClass;
8984 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8985 break;
8987 Opc = AArch64::MLAv2i32_indexed;
8988 RC = &AArch64::FPR64RegClass;
8989 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8990 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8991 RC);
8992 break;
8994 Opc = AArch64::MLSv2i32_indexed;
8995 RC = &AArch64::FPR64RegClass;
8996 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8997 break;
8999 Opc = AArch64::MLAv4i32_indexed;
9000 RC = &AArch64::FPR128RegClass;
9001 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9002 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
9003 RC);
9004 break;
9006 Opc = AArch64::MLSv4i32_indexed;
9007 RC = &AArch64::FPR128RegClass;
9008 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9009 break;
9010
9011 // Floating Point Support
9013 Opc = AArch64::FMADDHrrr;
9014 RC = &AArch64::FPR16RegClass;
9015 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9016 break;
9018 Opc = AArch64::FMADDSrrr;
9019 RC = &AArch64::FPR32RegClass;
9020 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9021 break;
9023 Opc = AArch64::FMADDDrrr;
9024 RC = &AArch64::FPR64RegClass;
9025 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9026 break;
9027
9029 Opc = AArch64::FMADDHrrr;
9030 RC = &AArch64::FPR16RegClass;
9031 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9032 break;
9034 Opc = AArch64::FMADDSrrr;
9035 RC = &AArch64::FPR32RegClass;
9036 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9037 break;
9039 Opc = AArch64::FMADDDrrr;
9040 RC = &AArch64::FPR64RegClass;
9041 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9042 break;
9043
9045 Opc = AArch64::FMLAv1i32_indexed;
9046 RC = &AArch64::FPR32RegClass;
9047 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9049 break;
9051 Opc = AArch64::FMLAv1i32_indexed;
9052 RC = &AArch64::FPR32RegClass;
9053 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9055 break;
9056
9058 Opc = AArch64::FMLAv1i64_indexed;
9059 RC = &AArch64::FPR64RegClass;
9060 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9062 break;
9064 Opc = AArch64::FMLAv1i64_indexed;
9065 RC = &AArch64::FPR64RegClass;
9066 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9068 break;
9069
9071 RC = &AArch64::FPR64RegClass;
9072 Opc = AArch64::FMLAv4i16_indexed;
9073 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9075 break;
9077 RC = &AArch64::FPR64RegClass;
9078 Opc = AArch64::FMLAv4f16;
9079 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9081 break;
9083 RC = &AArch64::FPR64RegClass;
9084 Opc = AArch64::FMLAv4i16_indexed;
9085 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9087 break;
9089 RC = &AArch64::FPR64RegClass;
9090 Opc = AArch64::FMLAv4f16;
9091 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9093 break;
9094
9097 RC = &AArch64::FPR64RegClass;
9099 Opc = AArch64::FMLAv2i32_indexed;
9100 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9102 } else {
9103 Opc = AArch64::FMLAv2f32;
9104 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9106 }
9107 break;
9110 RC = &AArch64::FPR64RegClass;
9112 Opc = AArch64::FMLAv2i32_indexed;
9113 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9115 } else {
9116 Opc = AArch64::FMLAv2f32;
9117 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9119 }
9120 break;
9121
9123 RC = &AArch64::FPR128RegClass;
9124 Opc = AArch64::FMLAv8i16_indexed;
9125 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9127 break;
9129 RC = &AArch64::FPR128RegClass;
9130 Opc = AArch64::FMLAv8f16;
9131 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9133 break;
9135 RC = &AArch64::FPR128RegClass;
9136 Opc = AArch64::FMLAv8i16_indexed;
9137 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9139 break;
9141 RC = &AArch64::FPR128RegClass;
9142 Opc = AArch64::FMLAv8f16;
9143 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9145 break;
9146
9149 RC = &AArch64::FPR128RegClass;
9151 Opc = AArch64::FMLAv2i64_indexed;
9152 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9154 } else {
9155 Opc = AArch64::FMLAv2f64;
9156 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9158 }
9159 break;
9162 RC = &AArch64::FPR128RegClass;
9164 Opc = AArch64::FMLAv2i64_indexed;
9165 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9167 } else {
9168 Opc = AArch64::FMLAv2f64;
9169 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9171 }
9172 break;
9173
9176 RC = &AArch64::FPR128RegClass;
9178 Opc = AArch64::FMLAv4i32_indexed;
9179 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9181 } else {
9182 Opc = AArch64::FMLAv4f32;
9183 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9185 }
9186 break;
9187
9190 RC = &AArch64::FPR128RegClass;
9192 Opc = AArch64::FMLAv4i32_indexed;
9193 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9195 } else {
9196 Opc = AArch64::FMLAv4f32;
9197 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9199 }
9200 break;
9201
9203 Opc = AArch64::FNMSUBHrrr;
9204 RC = &AArch64::FPR16RegClass;
9205 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9206 break;
9208 Opc = AArch64::FNMSUBSrrr;
9209 RC = &AArch64::FPR32RegClass;
9210 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9211 break;
9213 Opc = AArch64::FNMSUBDrrr;
9214 RC = &AArch64::FPR64RegClass;
9215 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9216 break;
9217
9219 Opc = AArch64::FNMADDHrrr;
9220 RC = &AArch64::FPR16RegClass;
9221 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9222 break;
9224 Opc = AArch64::FNMADDSrrr;
9225 RC = &AArch64::FPR32RegClass;
9226 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9227 break;
9229 Opc = AArch64::FNMADDDrrr;
9230 RC = &AArch64::FPR64RegClass;
9231 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9232 break;
9233
9235 Opc = AArch64::FMSUBHrrr;
9236 RC = &AArch64::FPR16RegClass;
9237 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9238 break;
9240 Opc = AArch64::FMSUBSrrr;
9241 RC = &AArch64::FPR32RegClass;
9242 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9243 break;
9245 Opc = AArch64::FMSUBDrrr;
9246 RC = &AArch64::FPR64RegClass;
9247 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9248 break;
9249
9251 Opc = AArch64::FMLSv1i32_indexed;
9252 RC = &AArch64::FPR32RegClass;
9253 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9255 break;
9256
9258 Opc = AArch64::FMLSv1i64_indexed;
9259 RC = &AArch64::FPR64RegClass;
9260 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9262 break;
9263
9266 RC = &AArch64::FPR64RegClass;
9267 Register NewVR = MRI.createVirtualRegister(RC);
9268 MachineInstrBuilder MIB1 =
9269 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
9270 .add(Root.getOperand(2));
9271 InsInstrs.push_back(MIB1);
9272 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9274 Opc = AArch64::FMLAv4f16;
9275 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9276 FMAInstKind::Accumulator, &NewVR);
9277 } else {
9278 Opc = AArch64::FMLAv4i16_indexed;
9279 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9280 FMAInstKind::Indexed, &NewVR);
9281 }
9282 break;
9283 }
9285 RC = &AArch64::FPR64RegClass;
9286 Opc = AArch64::FMLSv4f16;
9287 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9289 break;
9291 RC = &AArch64::FPR64RegClass;
9292 Opc = AArch64::FMLSv4i16_indexed;
9293 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9295 break;
9296
9299 RC = &AArch64::FPR64RegClass;
9301 Opc = AArch64::FMLSv2i32_indexed;
9302 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9304 } else {
9305 Opc = AArch64::FMLSv2f32;
9306 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9308 }
9309 break;
9310
9313 RC = &AArch64::FPR128RegClass;
9314 Register NewVR = MRI.createVirtualRegister(RC);
9315 MachineInstrBuilder MIB1 =
9316 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
9317 .add(Root.getOperand(2));
9318 InsInstrs.push_back(MIB1);
9319 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9321 Opc = AArch64::FMLAv8f16;
9322 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9323 FMAInstKind::Accumulator, &NewVR);
9324 } else {
9325 Opc = AArch64::FMLAv8i16_indexed;
9326 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9327 FMAInstKind::Indexed, &NewVR);
9328 }
9329 break;
9330 }
9332 RC = &AArch64::FPR128RegClass;
9333 Opc = AArch64::FMLSv8f16;
9334 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9336 break;
9338 RC = &AArch64::FPR128RegClass;
9339 Opc = AArch64::FMLSv8i16_indexed;
9340 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9342 break;
9343
9346 RC = &AArch64::FPR128RegClass;
9348 Opc = AArch64::FMLSv2i64_indexed;
9349 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9351 } else {
9352 Opc = AArch64::FMLSv2f64;
9353 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9355 }
9356 break;
9357
9360 RC = &AArch64::FPR128RegClass;
9362 Opc = AArch64::FMLSv4i32_indexed;
9363 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9365 } else {
9366 Opc = AArch64::FMLSv4f32;
9367 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9369 }
9370 break;
9373 RC = &AArch64::FPR64RegClass;
9374 Register NewVR = MRI.createVirtualRegister(RC);
9375 MachineInstrBuilder MIB1 =
9376 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9377 .add(Root.getOperand(2));
9378 InsInstrs.push_back(MIB1);
9379 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9381 Opc = AArch64::FMLAv2i32_indexed;
9382 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9383 FMAInstKind::Indexed, &NewVR);
9384 } else {
9385 Opc = AArch64::FMLAv2f32;
9386 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9387 FMAInstKind::Accumulator, &NewVR);
9388 }
9389 break;
9390 }
9393 RC = &AArch64::FPR128RegClass;
9394 Register NewVR = MRI.createVirtualRegister(RC);
9395 MachineInstrBuilder MIB1 =
9396 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9397 .add(Root.getOperand(2));
9398 InsInstrs.push_back(MIB1);
9399 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9401 Opc = AArch64::FMLAv4i32_indexed;
9402 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9403 FMAInstKind::Indexed, &NewVR);
9404 } else {
9405 Opc = AArch64::FMLAv4f32;
9406 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9407 FMAInstKind::Accumulator, &NewVR);
9408 }
9409 break;
9410 }
9413 RC = &AArch64::FPR128RegClass;
9414 Register NewVR = MRI.createVirtualRegister(RC);
9415 MachineInstrBuilder MIB1 =
9416 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9417 .add(Root.getOperand(2));
9418 InsInstrs.push_back(MIB1);
9419 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9421 Opc = AArch64::FMLAv2i64_indexed;
9422 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9423 FMAInstKind::Indexed, &NewVR);
9424 } else {
9425 Opc = AArch64::FMLAv2f64;
9426 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9427 FMAInstKind::Accumulator, &NewVR);
9428 }
9429 break;
9430 }
9433 unsigned IdxDupOp =
9435 : 2;
9436 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9437 &AArch64::FPR128RegClass, MRI);
9438 break;
9439 }
9442 unsigned IdxDupOp =
9444 : 2;
9445 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9446 &AArch64::FPR128RegClass, MRI);
9447 break;
9448 }
9451 unsigned IdxDupOp =
9453 : 2;
9454 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9455 &AArch64::FPR128_loRegClass, MRI);
9456 break;
9457 }
9460 unsigned IdxDupOp =
9462 : 2;
9463 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9464 &AArch64::FPR128RegClass, MRI);
9465 break;
9466 }
9469 unsigned IdxDupOp =
9471 : 2;
9472 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9473 &AArch64::FPR128_loRegClass, MRI);
9474 break;
9475 }
9477 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9478 break;
9479 }
9481 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9482 Pattern, 4);
9483 break;
9484 }
9486 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9487 Pattern, 8);
9488 break;
9489 }
9491 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9492 Pattern, 16);
9493 break;
9494 }
9495
9496 } // end switch (Pattern)
9497 // Record MUL and ADD/SUB for deletion
9498 if (MUL)
9499 DelInstrs.push_back(MUL);
9500 DelInstrs.push_back(&Root);
9501
9502 // Set the flags on the inserted instructions to be the merged flags of the
9503 // instructions that we have combined.
9504 uint32_t Flags = Root.getFlags();
9505 if (MUL)
9506 Flags = Root.mergeFlagsWith(*MUL);
9507 for (auto *MI : InsInstrs)
9508 MI->setFlags(Flags);
9509}
9510
9511/// Replace csincr-branch sequence by simple conditional branch
9512///
9513/// Examples:
9514/// 1. \code
9515/// csinc w9, wzr, wzr, <condition code>
9516/// tbnz w9, #0, 0x44
9517/// \endcode
9518/// to
9519/// \code
9520/// b.<inverted condition code>
9521/// \endcode
9522///
9523/// 2. \code
9524/// csinc w9, wzr, wzr, <condition code>
9525/// tbz w9, #0, 0x44
9526/// \endcode
9527/// to
9528/// \code
9529/// b.<condition code>
9530/// \endcode
9531///
9532/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9533/// compare's constant operand is power of 2.
9534///
9535/// Examples:
9536/// \code
9537/// and w8, w8, #0x400
9538/// cbnz w8, L1
9539/// \endcode
9540/// to
9541/// \code
9542/// tbnz w8, #10, L1
9543/// \endcode
9544///
9545/// \param MI Conditional Branch
9546/// \return True when the simple conditional branch is generated
9547///
9549 bool IsNegativeBranch = false;
9550 bool IsTestAndBranch = false;
9551 unsigned TargetBBInMI = 0;
9552 switch (MI.getOpcode()) {
9553 default:
9554 llvm_unreachable("Unknown branch instruction?");
9555 case AArch64::Bcc:
9556 case AArch64::CBWPri:
9557 case AArch64::CBXPri:
9558 case AArch64::CBBAssertExt:
9559 case AArch64::CBHAssertExt:
9560 case AArch64::CBWPrr:
9561 case AArch64::CBXPrr:
9562 return false;
9563 case AArch64::CBZW:
9564 case AArch64::CBZX:
9565 TargetBBInMI = 1;
9566 break;
9567 case AArch64::CBNZW:
9568 case AArch64::CBNZX:
9569 TargetBBInMI = 1;
9570 IsNegativeBranch = true;
9571 break;
9572 case AArch64::TBZW:
9573 case AArch64::TBZX:
9574 TargetBBInMI = 2;
9575 IsTestAndBranch = true;
9576 break;
9577 case AArch64::TBNZW:
9578 case AArch64::TBNZX:
9579 TargetBBInMI = 2;
9580 IsNegativeBranch = true;
9581 IsTestAndBranch = true;
9582 break;
9583 }
9584 // So we increment a zero register and test for bits other
9585 // than bit 0? Conservatively bail out in case the verifier
9586 // missed this case.
9587 if (IsTestAndBranch && MI.getOperand(1).getImm())
9588 return false;
9589
9590 // Find Definition.
9591 assert(MI.getParent() && "Incomplete machine instruction\n");
9592 MachineBasicBlock *MBB = MI.getParent();
9593 MachineFunction *MF = MBB->getParent();
9595 Register VReg = MI.getOperand(0).getReg();
9596 if (!VReg.isVirtual())
9597 return false;
9598
9599 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9600
9601 // Look through COPY instructions to find definition.
9602 while (DefMI->isCopy()) {
9603 Register CopyVReg = DefMI->getOperand(1).getReg();
9604 if (!MRI->hasOneNonDBGUse(CopyVReg))
9605 return false;
9606 if (!MRI->hasOneDef(CopyVReg))
9607 return false;
9608 DefMI = MRI->getVRegDef(CopyVReg);
9609 }
9610
9611 switch (DefMI->getOpcode()) {
9612 default:
9613 return false;
9614 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9615 case AArch64::ANDWri:
9616 case AArch64::ANDXri: {
9617 if (IsTestAndBranch)
9618 return false;
9619 if (DefMI->getParent() != MBB)
9620 return false;
9621 if (!MRI->hasOneNonDBGUse(VReg))
9622 return false;
9623
9624 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9626 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9627 if (!isPowerOf2_64(Mask))
9628 return false;
9629
9630 MachineOperand &MO = DefMI->getOperand(1);
9631 Register NewReg = MO.getReg();
9632 if (!NewReg.isVirtual())
9633 return false;
9634
9635 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9636
9637 MachineBasicBlock &RefToMBB = *MBB;
9638 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9639 DebugLoc DL = MI.getDebugLoc();
9640 unsigned Imm = Log2_64(Mask);
9641 unsigned Opc = (Imm < 32)
9642 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9643 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9644 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9645 .addReg(NewReg)
9646 .addImm(Imm)
9647 .addMBB(TBB);
9648 // Register lives on to the CBZ now.
9649 MO.setIsKill(false);
9650
9651 // For immediate smaller than 32, we need to use the 32-bit
9652 // variant (W) in all cases. Indeed the 64-bit variant does not
9653 // allow to encode them.
9654 // Therefore, if the input register is 64-bit, we need to take the
9655 // 32-bit sub-part.
9656 if (!Is32Bit && Imm < 32)
9657 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9658 MI.eraseFromParent();
9659 return true;
9660 }
9661 // Look for CSINC
9662 case AArch64::CSINCWr:
9663 case AArch64::CSINCXr: {
9664 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9665 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9666 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9667 DefMI->getOperand(2).getReg() == AArch64::XZR))
9668 return false;
9669
9670 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9671 true) != -1)
9672 return false;
9673
9674 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9675 // Convert only when the condition code is not modified between
9676 // the CSINC and the branch. The CC may be used by other
9677 // instructions in between.
9679 return false;
9680 MachineBasicBlock &RefToMBB = *MBB;
9681 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9682 DebugLoc DL = MI.getDebugLoc();
9683 if (IsNegativeBranch)
9685 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9686 MI.eraseFromParent();
9687 return true;
9688 }
9689 }
9690}
9691
9692std::pair<unsigned, unsigned>
9693AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9694 const unsigned Mask = AArch64II::MO_FRAGMENT;
9695 return std::make_pair(TF & Mask, TF & ~Mask);
9696}
9697
9699AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9700 using namespace AArch64II;
9701
9702 static const std::pair<unsigned, const char *> TargetFlags[] = {
9703 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9704 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9705 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9706 {MO_HI12, "aarch64-hi12"}};
9707 return ArrayRef(TargetFlags);
9708}
9709
9711AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9712 using namespace AArch64II;
9713
9714 static const std::pair<unsigned, const char *> TargetFlags[] = {
9715 {MO_COFFSTUB, "aarch64-coffstub"},
9716 {MO_GOT, "aarch64-got"},
9717 {MO_NC, "aarch64-nc"},
9718 {MO_S, "aarch64-s"},
9719 {MO_TLS, "aarch64-tls"},
9720 {MO_DLLIMPORT, "aarch64-dllimport"},
9721 {MO_PREL, "aarch64-prel"},
9722 {MO_TAGGED, "aarch64-tagged"},
9723 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9724 };
9725 return ArrayRef(TargetFlags);
9726}
9727
9729AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9730 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9731 {{MOSuppressPair, "aarch64-suppress-pair"},
9732 {MOStridedAccess, "aarch64-strided-access"}};
9733 return ArrayRef(TargetFlags);
9734}
9735
9736/// Constants defining how certain sequences should be outlined.
9737/// This encompasses how an outlined function should be called, and what kind of
9738/// frame should be emitted for that outlined function.
9739///
9740/// \p MachineOutlinerDefault implies that the function should be called with
9741/// a save and restore of LR to the stack.
9742///
9743/// That is,
9744///
9745/// I1 Save LR OUTLINED_FUNCTION:
9746/// I2 --> BL OUTLINED_FUNCTION I1
9747/// I3 Restore LR I2
9748/// I3
9749/// RET
9750///
9751/// * Call construction overhead: 3 (save + BL + restore)
9752/// * Frame construction overhead: 1 (ret)
9753/// * Requires stack fixups? Yes
9754///
9755/// \p MachineOutlinerTailCall implies that the function is being created from
9756/// a sequence of instructions ending in a return.
9757///
9758/// That is,
9759///
9760/// I1 OUTLINED_FUNCTION:
9761/// I2 --> B OUTLINED_FUNCTION I1
9762/// RET I2
9763/// RET
9764///
9765/// * Call construction overhead: 1 (B)
9766/// * Frame construction overhead: 0 (Return included in sequence)
9767/// * Requires stack fixups? No
9768///
9769/// \p MachineOutlinerNoLRSave implies that the function should be called using
9770/// a BL instruction, but doesn't require LR to be saved and restored. This
9771/// happens when LR is known to be dead.
9772///
9773/// That is,
9774///
9775/// I1 OUTLINED_FUNCTION:
9776/// I2 --> BL OUTLINED_FUNCTION I1
9777/// I3 I2
9778/// I3
9779/// RET
9780///
9781/// * Call construction overhead: 1 (BL)
9782/// * Frame construction overhead: 1 (RET)
9783/// * Requires stack fixups? No
9784///
9785/// \p MachineOutlinerThunk implies that the function is being created from
9786/// a sequence of instructions ending in a call. The outlined function is
9787/// called with a BL instruction, and the outlined function tail-calls the
9788/// original call destination.
9789///
9790/// That is,
9791///
9792/// I1 OUTLINED_FUNCTION:
9793/// I2 --> BL OUTLINED_FUNCTION I1
9794/// BL f I2
9795/// B f
9796/// * Call construction overhead: 1 (BL)
9797/// * Frame construction overhead: 0
9798/// * Requires stack fixups? No
9799///
9800/// \p MachineOutlinerRegSave implies that the function should be called with a
9801/// save and restore of LR to an available register. This allows us to avoid
9802/// stack fixups. Note that this outlining variant is compatible with the
9803/// NoLRSave case.
9804///
9805/// That is,
9806///
9807/// I1 Save LR OUTLINED_FUNCTION:
9808/// I2 --> BL OUTLINED_FUNCTION I1
9809/// I3 Restore LR I2
9810/// I3
9811/// RET
9812///
9813/// * Call construction overhead: 3 (save + BL + restore)
9814/// * Frame construction overhead: 1 (ret)
9815/// * Requires stack fixups? No
9817 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9818 MachineOutlinerTailCall, /// Only emit a branch.
9819 MachineOutlinerNoLRSave, /// Emit a call and return.
9820 MachineOutlinerThunk, /// Emit a call and tail-call.
9821 MachineOutlinerRegSave /// Same as default, but save to a register.
9822};
9823
9829
9831AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9832 MachineFunction *MF = C.getMF();
9833 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9834 const AArch64RegisterInfo *ARI =
9835 static_cast<const AArch64RegisterInfo *>(&TRI);
9836 // Check if there is an available register across the sequence that we can
9837 // use.
9838 for (unsigned Reg : AArch64::GPR64RegClass) {
9839 if (!ARI->isReservedReg(*MF, Reg) &&
9840 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9841 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9842 Reg != AArch64::X17 && // Ditto for X17.
9843 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9844 C.isAvailableInsideSeq(Reg, TRI))
9845 return Reg;
9846 }
9847 return Register();
9848}
9849
9850static bool
9852 const outliner::Candidate &b) {
9853 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9854 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9855
9856 return MFIa->getSignReturnAddressCondition() ==
9858}
9859
9860static bool
9862 const outliner::Candidate &b) {
9863 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9864 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9865
9866 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9867}
9868
9870 const outliner::Candidate &b) {
9871 const AArch64Subtarget &SubtargetA =
9873 const AArch64Subtarget &SubtargetB =
9874 b.getMF()->getSubtarget<AArch64Subtarget>();
9875 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9876}
9877
9878std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9879AArch64InstrInfo::getOutliningCandidateInfo(
9880 const MachineModuleInfo &MMI,
9881 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9882 unsigned MinRepeats) const {
9883 unsigned SequenceSize = 0;
9884 for (auto &MI : RepeatedSequenceLocs[0])
9885 SequenceSize += getInstSizeInBytes(MI);
9886
9887 unsigned NumBytesToCreateFrame = 0;
9888
9889 // Avoid splitting ADRP ADD/LDR pair into outlined functions.
9890 // These instructions are fused together by the scheduler.
9891 // Any candidate where ADRP is the last instruction should be rejected
9892 // as that will lead to splitting ADRP pair.
9893 MachineInstr &LastMI = RepeatedSequenceLocs[0].back();
9894 MachineInstr &FirstMI = RepeatedSequenceLocs[0].front();
9895 if (LastMI.getOpcode() == AArch64::ADRP &&
9896 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 &&
9897 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) {
9898 return std::nullopt;
9899 }
9900
9901 // Similarly any candidate where the first instruction is ADD/LDR with a
9902 // page offset should be rejected to avoid ADRP splitting.
9903 if ((FirstMI.getOpcode() == AArch64::ADDXri ||
9904 FirstMI.getOpcode() == AArch64::LDRXui) &&
9905 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 &&
9906 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) {
9907 return std::nullopt;
9908 }
9909
9910 // We only allow outlining for functions having exactly matching return
9911 // address signing attributes, i.e., all share the same value for the
9912 // attribute "sign-return-address" and all share the same type of key they
9913 // are signed with.
9914 // Additionally we require all functions to simultaneously either support
9915 // v8.3a features or not. Otherwise an outlined function could get signed
9916 // using dedicated v8.3 instructions and a call from a function that doesn't
9917 // support v8.3 instructions would therefore be invalid.
9918 if (std::adjacent_find(
9919 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9920 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9921 // Return true if a and b are non-equal w.r.t. return address
9922 // signing or support of v8.3a features
9923 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9924 outliningCandidatesSigningKeyConsensus(a, b) &&
9925 outliningCandidatesV8_3OpsConsensus(a, b)) {
9926 return false;
9927 }
9928 return true;
9929 }) != RepeatedSequenceLocs.end()) {
9930 return std::nullopt;
9931 }
9932
9933 // Since at this point all candidates agree on their return address signing
9934 // picking just one is fine. If the candidate functions potentially sign their
9935 // return addresses, the outlined function should do the same. Note that in
9936 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9937 // not certainly true that the outlined function will have to sign its return
9938 // address but this decision is made later, when the decision to outline
9939 // has already been made.
9940 // The same holds for the number of additional instructions we need: On
9941 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9942 // necessary. However, at this point we don't know if the outlined function
9943 // will have a RET instruction so we assume the worst.
9944 const TargetRegisterInfo &TRI = getRegisterInfo();
9945 // Performing a tail call may require extra checks when PAuth is enabled.
9946 // If PAuth is disabled, set it to zero for uniformity.
9947 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9948 const auto RASignCondition = RepeatedSequenceLocs[0]
9949 .getMF()
9950 ->getInfo<AArch64FunctionInfo>()
9951 ->getSignReturnAddressCondition();
9952 if (RASignCondition != SignReturnAddress::None) {
9953 // One PAC and one AUT instructions
9954 NumBytesToCreateFrame += 8;
9955
9956 // PAuth is enabled - set extra tail call cost, if any.
9957 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9958 *RepeatedSequenceLocs[0].getMF());
9959 NumBytesToCheckLRInTCEpilogue =
9961 // Checking the authenticated LR value may significantly impact
9962 // SequenceSize, so account for it for more precise results.
9963 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
9964 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9965
9966 // We have to check if sp modifying instructions would get outlined.
9967 // If so we only allow outlining if sp is unchanged overall, so matching
9968 // sub and add instructions are okay to outline, all other sp modifications
9969 // are not
9970 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9971 int SPValue = 0;
9972 for (auto &MI : C) {
9973 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
9974 switch (MI.getOpcode()) {
9975 case AArch64::ADDXri:
9976 case AArch64::ADDWri:
9977 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9978 assert(MI.getOperand(2).isImm() &&
9979 "Expected operand to be immediate");
9980 assert(MI.getOperand(1).isReg() &&
9981 "Expected operand to be a register");
9982 // Check if the add just increments sp. If so, we search for
9983 // matching sub instructions that decrement sp. If not, the
9984 // modification is illegal
9985 if (MI.getOperand(1).getReg() == AArch64::SP)
9986 SPValue += MI.getOperand(2).getImm();
9987 else
9988 return true;
9989 break;
9990 case AArch64::SUBXri:
9991 case AArch64::SUBWri:
9992 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9993 assert(MI.getOperand(2).isImm() &&
9994 "Expected operand to be immediate");
9995 assert(MI.getOperand(1).isReg() &&
9996 "Expected operand to be a register");
9997 // Check if the sub just decrements sp. If so, we search for
9998 // matching add instructions that increment sp. If not, the
9999 // modification is illegal
10000 if (MI.getOperand(1).getReg() == AArch64::SP)
10001 SPValue -= MI.getOperand(2).getImm();
10002 else
10003 return true;
10004 break;
10005 default:
10006 return true;
10007 }
10008 }
10009 }
10010 if (SPValue)
10011 return true;
10012 return false;
10013 };
10014 // Remove candidates with illegal stack modifying instructions
10015 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
10016
10017 // If the sequence doesn't have enough candidates left, then we're done.
10018 if (RepeatedSequenceLocs.size() < MinRepeats)
10019 return std::nullopt;
10020 }
10021
10022 // Properties about candidate MBBs that hold for all of them.
10023 unsigned FlagsSetInAll = 0xF;
10024
10025 // Compute liveness information for each candidate, and set FlagsSetInAll.
10026 for (outliner::Candidate &C : RepeatedSequenceLocs)
10027 FlagsSetInAll &= C.Flags;
10028
10029 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
10030
10031 // Helper lambda which sets call information for every candidate.
10032 auto SetCandidateCallInfo =
10033 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
10034 for (outliner::Candidate &C : RepeatedSequenceLocs)
10035 C.setCallInfo(CallID, NumBytesForCall);
10036 };
10037
10038 unsigned FrameID = MachineOutlinerDefault;
10039 NumBytesToCreateFrame += 4;
10040
10041 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
10042 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
10043 });
10044
10045 // We check to see if CFI Instructions are present, and if they are
10046 // we find the number of CFI Instructions in the candidates.
10047 unsigned CFICount = 0;
10048 for (auto &I : RepeatedSequenceLocs[0]) {
10049 if (I.isCFIInstruction())
10050 CFICount++;
10051 }
10052
10053 // We compare the number of found CFI Instructions to the number of CFI
10054 // instructions in the parent function for each candidate. We must check this
10055 // since if we outline one of the CFI instructions in a function, we have to
10056 // outline them all for correctness. If we do not, the address offsets will be
10057 // incorrect between the two sections of the program.
10058 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10059 std::vector<MCCFIInstruction> CFIInstructions =
10060 C.getMF()->getFrameInstructions();
10061
10062 if (CFICount > 0 && CFICount != CFIInstructions.size())
10063 return std::nullopt;
10064 }
10065
10066 // Returns true if an instructions is safe to fix up, false otherwise.
10067 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
10068 if (MI.isCall())
10069 return true;
10070
10071 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
10072 !MI.readsRegister(AArch64::SP, &TRI))
10073 return true;
10074
10075 // Any modification of SP will break our code to save/restore LR.
10076 // FIXME: We could handle some instructions which add a constant
10077 // offset to SP, with a bit more work.
10078 if (MI.modifiesRegister(AArch64::SP, &TRI))
10079 return false;
10080
10081 // At this point, we have a stack instruction that we might need to
10082 // fix up. We'll handle it if it's a load or store.
10083 if (MI.mayLoadOrStore()) {
10084 const MachineOperand *Base; // Filled with the base operand of MI.
10085 int64_t Offset; // Filled with the offset of MI.
10086 bool OffsetIsScalable;
10087
10088 // Does it allow us to offset the base operand and is the base the
10089 // register SP?
10090 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
10091 !Base->isReg() || Base->getReg() != AArch64::SP)
10092 return false;
10093
10094 // Fixe-up code below assumes bytes.
10095 if (OffsetIsScalable)
10096 return false;
10097
10098 // Find the minimum/maximum offset for this instruction and check
10099 // if fixing it up would be in range.
10100 int64_t MinOffset,
10101 MaxOffset; // Unscaled offsets for the instruction.
10102 // The scale to multiply the offsets by.
10103 TypeSize Scale(0U, false), DummyWidth(0U, false);
10104 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
10105
10106 Offset += 16; // Update the offset to what it would be if we outlined.
10107 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
10108 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
10109 return false;
10110
10111 // It's in range, so we can outline it.
10112 return true;
10113 }
10114
10115 // FIXME: Add handling for instructions like "add x0, sp, #8".
10116
10117 // We can't fix it up, so don't outline it.
10118 return false;
10119 };
10120
10121 // True if it's possible to fix up each stack instruction in this sequence.
10122 // Important for frames/call variants that modify the stack.
10123 bool AllStackInstrsSafe =
10124 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
10125
10126 // If the last instruction in any candidate is a terminator, then we should
10127 // tail call all of the candidates.
10128 if (RepeatedSequenceLocs[0].back().isTerminator()) {
10129 FrameID = MachineOutlinerTailCall;
10130 NumBytesToCreateFrame = 0;
10131 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
10132 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
10133 }
10134
10135 else if (LastInstrOpcode == AArch64::BL ||
10136 ((LastInstrOpcode == AArch64::BLR ||
10137 LastInstrOpcode == AArch64::BLRNoIP) &&
10138 !HasBTI)) {
10139 // FIXME: Do we need to check if the code after this uses the value of LR?
10140 FrameID = MachineOutlinerThunk;
10141 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
10142 SetCandidateCallInfo(MachineOutlinerThunk, 4);
10143 }
10144
10145 else {
10146 // We need to decide how to emit calls + frames. We can always emit the same
10147 // frame if we don't need to save to the stack. If we have to save to the
10148 // stack, then we need a different frame.
10149 unsigned NumBytesNoStackCalls = 0;
10150 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
10151
10152 // Check if we have to save LR.
10153 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10154 bool LRAvailable =
10156 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
10157 : true;
10158 // If we have a noreturn caller, then we're going to be conservative and
10159 // say that we have to save LR. If we don't have a ret at the end of the
10160 // block, then we can't reason about liveness accurately.
10161 //
10162 // FIXME: We can probably do better than always disabling this in
10163 // noreturn functions by fixing up the liveness info.
10164 bool IsNoReturn =
10165 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
10166
10167 // Is LR available? If so, we don't need a save.
10168 if (LRAvailable && !IsNoReturn) {
10169 NumBytesNoStackCalls += 4;
10170 C.setCallInfo(MachineOutlinerNoLRSave, 4);
10171 CandidatesWithoutStackFixups.push_back(C);
10172 }
10173
10174 // Is an unused register available? If so, we won't modify the stack, so
10175 // we can outline with the same frame type as those that don't save LR.
10176 else if (findRegisterToSaveLRTo(C)) {
10177 NumBytesNoStackCalls += 12;
10178 C.setCallInfo(MachineOutlinerRegSave, 12);
10179 CandidatesWithoutStackFixups.push_back(C);
10180 }
10181
10182 // Is SP used in the sequence at all? If not, we don't have to modify
10183 // the stack, so we are guaranteed to get the same frame.
10184 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
10185 NumBytesNoStackCalls += 12;
10186 C.setCallInfo(MachineOutlinerDefault, 12);
10187 CandidatesWithoutStackFixups.push_back(C);
10188 }
10189
10190 // If we outline this, we need to modify the stack. Pretend we don't
10191 // outline this by saving all of its bytes.
10192 else {
10193 NumBytesNoStackCalls += SequenceSize;
10194 }
10195 }
10196
10197 // If there are no places where we have to save LR, then note that we
10198 // don't have to update the stack. Otherwise, give every candidate the
10199 // default call type, as long as it's safe to do so.
10200 if (!AllStackInstrsSafe ||
10201 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
10202 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
10203 FrameID = MachineOutlinerNoLRSave;
10204 if (RepeatedSequenceLocs.size() < MinRepeats)
10205 return std::nullopt;
10206 } else {
10207 SetCandidateCallInfo(MachineOutlinerDefault, 12);
10208
10209 // Bugzilla ID: 46767
10210 // TODO: Check if fixing up the stack more than once is safe so we can
10211 // outline these.
10212 //
10213 // An outline resulting in a caller that requires stack fixups at the
10214 // callsite to a callee that also requires stack fixups can happen when
10215 // there are no available registers at the candidate callsite for a
10216 // candidate that itself also has calls.
10217 //
10218 // In other words if function_containing_sequence in the following pseudo
10219 // assembly requires that we save LR at the point of the call, but there
10220 // are no available registers: in this case we save using SP and as a
10221 // result the SP offsets requires stack fixups by multiples of 16.
10222 //
10223 // function_containing_sequence:
10224 // ...
10225 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10226 // call OUTLINED_FUNCTION_N
10227 // restore LR from SP
10228 // ...
10229 //
10230 // OUTLINED_FUNCTION_N:
10231 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10232 // ...
10233 // bl foo
10234 // restore LR from SP
10235 // ret
10236 //
10237 // Because the code to handle more than one stack fixup does not
10238 // currently have the proper checks for legality, these cases will assert
10239 // in the AArch64 MachineOutliner. This is because the code to do this
10240 // needs more hardening, testing, better checks that generated code is
10241 // legal, etc and because it is only verified to handle a single pass of
10242 // stack fixup.
10243 //
10244 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
10245 // these cases until they are known to be handled. Bugzilla 46767 is
10246 // referenced in comments at the assert site.
10247 //
10248 // To avoid asserting (or generating non-legal code on noassert builds)
10249 // we remove all candidates which would need more than one stack fixup by
10250 // pruning the cases where the candidate has calls while also having no
10251 // available LR and having no available general purpose registers to copy
10252 // LR to (ie one extra stack save/restore).
10253 //
10254 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10255 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
10256 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
10257 return (llvm::any_of(C, IsCall)) &&
10258 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
10259 !findRegisterToSaveLRTo(C));
10260 });
10261 }
10262 }
10263
10264 // If we dropped all of the candidates, bail out here.
10265 if (RepeatedSequenceLocs.size() < MinRepeats)
10266 return std::nullopt;
10267 }
10268
10269 // Does every candidate's MBB contain a call? If so, then we might have a call
10270 // in the range.
10271 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10272 // Check if the range contains a call. These require a save + restore of the
10273 // link register.
10274 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
10275 bool ModStackToSaveLR = false;
10276 if (any_of(drop_end(FirstCand),
10277 [](const MachineInstr &MI) { return MI.isCall(); }))
10278 ModStackToSaveLR = true;
10279
10280 // Handle the last instruction separately. If this is a tail call, then the
10281 // last instruction is a call. We don't want to save + restore in this case.
10282 // However, it could be possible that the last instruction is a call without
10283 // it being valid to tail call this sequence. We should consider this as
10284 // well.
10285 else if (FrameID != MachineOutlinerThunk &&
10286 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
10287 ModStackToSaveLR = true;
10288
10289 if (ModStackToSaveLR) {
10290 // We can't fix up the stack. Bail out.
10291 if (!AllStackInstrsSafe)
10292 return std::nullopt;
10293
10294 // Save + restore LR.
10295 NumBytesToCreateFrame += 8;
10296 }
10297 }
10298
10299 // If we have CFI instructions, we can only outline if the outlined section
10300 // can be a tail call
10301 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
10302 return std::nullopt;
10303
10304 return std::make_unique<outliner::OutlinedFunction>(
10305 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
10306}
10307
10308void AArch64InstrInfo::mergeOutliningCandidateAttributes(
10309 Function &F, std::vector<outliner::Candidate> &Candidates) const {
10310 // If a bunch of candidates reach this point they must agree on their return
10311 // address signing. It is therefore enough to just consider the signing
10312 // behaviour of one of them
10313 const auto &CFn = Candidates.front().getMF()->getFunction();
10314
10315 if (CFn.hasFnAttribute("ptrauth-returns"))
10316 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
10317 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
10318 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
10319 // Since all candidates belong to the same module, just copy the
10320 // function-level attributes of an arbitrary function.
10321 if (CFn.hasFnAttribute("sign-return-address"))
10322 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
10323 if (CFn.hasFnAttribute("sign-return-address-key"))
10324 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
10325
10326 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
10327}
10328
10329bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
10330 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10331 const Function &F = MF.getFunction();
10332
10333 // Can F be deduplicated by the linker? If it can, don't outline from it.
10334 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10335 return false;
10336
10337 // Don't outline from functions with section markings; the program could
10338 // expect that all the code is in the named section.
10339 // FIXME: Allow outlining from multiple functions with the same section
10340 // marking.
10341 if (F.hasSection())
10342 return false;
10343
10344 // Outlining from functions with redzones is unsafe since the outliner may
10345 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
10346 // outline from it.
10347 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
10348 if (!AFI || AFI->hasRedZone().value_or(true))
10349 return false;
10350
10351 // FIXME: Determine whether it is safe to outline from functions which contain
10352 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
10353 // outlined together and ensure it is safe to outline with async unwind info,
10354 // required for saving & restoring VG around calls.
10355 if (AFI->hasStreamingModeChanges())
10356 return false;
10357
10358 // FIXME: Teach the outliner to generate/handle Windows unwind info.
10360 return false;
10361
10362 // It's safe to outline from MF.
10363 return true;
10364}
10365
10367AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10368 unsigned &Flags) const {
10370 "Must track liveness!");
10372 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10373 Ranges;
10374 // According to the AArch64 Procedure Call Standard, the following are
10375 // undefined on entry/exit from a function call:
10376 //
10377 // * Registers x16, x17, (and thus w16, w17)
10378 // * Condition codes (and thus the NZCV register)
10379 //
10380 // If any of these registers are used inside or live across an outlined
10381 // function, then they may be modified later, either by the compiler or
10382 // some other tool (like the linker).
10383 //
10384 // To avoid outlining in these situations, partition each block into ranges
10385 // where these registers are dead. We will only outline from those ranges.
10386 LiveRegUnits LRU(getRegisterInfo());
10387 auto AreAllUnsafeRegsDead = [&LRU]() {
10388 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10389 LRU.available(AArch64::NZCV);
10390 };
10391
10392 // We need to know if LR is live across an outlining boundary later on in
10393 // order to decide how we'll create the outlined call, frame, etc.
10394 //
10395 // It's pretty expensive to check this for *every candidate* within a block.
10396 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10397 // to compute liveness from the end of the block for O(n) candidates within
10398 // the block.
10399 //
10400 // So, to improve the average case, let's keep track of liveness from the end
10401 // of the block to the beginning of *every outlinable range*. If we know that
10402 // LR is available in every range we could outline from, then we know that
10403 // we don't need to check liveness for any candidate within that range.
10404 bool LRAvailableEverywhere = true;
10405 // Compute liveness bottom-up.
10406 LRU.addLiveOuts(MBB);
10407 // Update flags that require info about the entire MBB.
10408 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10409 if (MI.isCall() && !MI.isTerminator())
10411 };
10412 // Range: [RangeBegin, RangeEnd)
10413 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10414 unsigned RangeLen;
10415 auto CreateNewRangeStartingAt =
10416 [&RangeBegin, &RangeEnd,
10417 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10418 RangeBegin = NewBegin;
10419 RangeEnd = std::next(RangeBegin);
10420 RangeLen = 0;
10421 };
10422 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10423 // At least one unsafe register is not dead. We do not want to outline at
10424 // this point. If it is long enough to outline from and does not cross a
10425 // bundle boundary, save the range [RangeBegin, RangeEnd).
10426 if (RangeLen <= 1)
10427 return;
10428 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10429 return;
10430 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10431 return;
10432 Ranges.emplace_back(RangeBegin, RangeEnd);
10433 };
10434 // Find the first point where all unsafe registers are dead.
10435 // FIND: <safe instr> <-- end of first potential range
10436 // SKIP: <unsafe def>
10437 // SKIP: ... everything between ...
10438 // SKIP: <unsafe use>
10439 auto FirstPossibleEndPt = MBB.instr_rbegin();
10440 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10441 LRU.stepBackward(*FirstPossibleEndPt);
10442 // Update flags that impact how we outline across the entire block,
10443 // regardless of safety.
10444 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10445 if (AreAllUnsafeRegsDead())
10446 break;
10447 }
10448 // If we exhausted the entire block, we have no safe ranges to outline.
10449 if (FirstPossibleEndPt == MBB.instr_rend())
10450 return Ranges;
10451 // Current range.
10452 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10453 // StartPt points to the first place where all unsafe registers
10454 // are dead (if there is any such point). Begin partitioning the MBB into
10455 // ranges.
10456 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10457 LRU.stepBackward(MI);
10458 UpdateWholeMBBFlags(MI);
10459 if (!AreAllUnsafeRegsDead()) {
10460 SaveRangeIfNonEmpty();
10461 CreateNewRangeStartingAt(MI.getIterator());
10462 continue;
10463 }
10464 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10465 RangeBegin = MI.getIterator();
10466 ++RangeLen;
10467 }
10468 // Above loop misses the last (or only) range. If we are still safe, then
10469 // let's save the range.
10470 if (AreAllUnsafeRegsDead())
10471 SaveRangeIfNonEmpty();
10472 if (Ranges.empty())
10473 return Ranges;
10474 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10475 // the order.
10476 std::reverse(Ranges.begin(), Ranges.end());
10477 // If there is at least one outlinable range where LR is unavailable
10478 // somewhere, remember that.
10479 if (!LRAvailableEverywhere)
10481 return Ranges;
10482}
10483
10485AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10487 unsigned Flags) const {
10488 MachineInstr &MI = *MIT;
10489
10490 // Don't outline anything used for return address signing. The outlined
10491 // function will get signed later if needed
10492 switch (MI.getOpcode()) {
10493 case AArch64::PACM:
10494 case AArch64::PACIASP:
10495 case AArch64::PACIBSP:
10496 case AArch64::PACIASPPC:
10497 case AArch64::PACIBSPPC:
10498 case AArch64::AUTIASP:
10499 case AArch64::AUTIBSP:
10500 case AArch64::AUTIASPPCi:
10501 case AArch64::AUTIASPPCr:
10502 case AArch64::AUTIBSPPCi:
10503 case AArch64::AUTIBSPPCr:
10504 case AArch64::RETAA:
10505 case AArch64::RETAB:
10506 case AArch64::RETAASPPCi:
10507 case AArch64::RETAASPPCr:
10508 case AArch64::RETABSPPCi:
10509 case AArch64::RETABSPPCr:
10510 case AArch64::EMITBKEY:
10511 case AArch64::PAUTH_PROLOGUE:
10512 case AArch64::PAUTH_EPILOGUE:
10514 }
10515
10516 // We can only outline these if we will tail call the outlined function, or
10517 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10518 // in a tail call.
10519 //
10520 // FIXME: If the proper fixups for the offset are implemented, this should be
10521 // possible.
10522 if (MI.isCFIInstruction())
10524
10525 // Is this a terminator for a basic block?
10526 if (MI.isTerminator())
10527 // TargetInstrInfo::getOutliningType has already filtered out anything
10528 // that would break this, so we can allow it here.
10530
10531 // Make sure none of the operands are un-outlinable.
10532 for (const MachineOperand &MOP : MI.operands()) {
10533 // A check preventing CFI indices was here before, but only CFI
10534 // instructions should have those.
10535 assert(!MOP.isCFIIndex());
10536
10537 // If it uses LR or W30 explicitly, then don't touch it.
10538 if (MOP.isReg() && !MOP.isImplicit() &&
10539 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10541 }
10542
10543 // Special cases for instructions that can always be outlined, but will fail
10544 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10545 // be outlined because they don't require a *specific* value to be in LR.
10546 if (MI.getOpcode() == AArch64::ADRP)
10548
10549 // If MI is a call we might be able to outline it. We don't want to outline
10550 // any calls that rely on the position of items on the stack. When we outline
10551 // something containing a call, we have to emit a save and restore of LR in
10552 // the outlined function. Currently, this always happens by saving LR to the
10553 // stack. Thus, if we outline, say, half the parameters for a function call
10554 // plus the call, then we'll break the callee's expectations for the layout
10555 // of the stack.
10556 //
10557 // FIXME: Allow calls to functions which construct a stack frame, as long
10558 // as they don't access arguments on the stack.
10559 // FIXME: Figure out some way to analyze functions defined in other modules.
10560 // We should be able to compute the memory usage based on the IR calling
10561 // convention, even if we can't see the definition.
10562 if (MI.isCall()) {
10563 // Get the function associated with the call. Look at each operand and find
10564 // the one that represents the callee and get its name.
10565 const Function *Callee = nullptr;
10566 for (const MachineOperand &MOP : MI.operands()) {
10567 if (MOP.isGlobal()) {
10568 Callee = dyn_cast<Function>(MOP.getGlobal());
10569 break;
10570 }
10571 }
10572
10573 // Never outline calls to mcount. There isn't any rule that would require
10574 // this, but the Linux kernel's "ftrace" feature depends on it.
10575 if (Callee && Callee->getName() == "\01_mcount")
10577
10578 // If we don't know anything about the callee, assume it depends on the
10579 // stack layout of the caller. In that case, it's only legal to outline
10580 // as a tail-call. Explicitly list the call instructions we know about so we
10581 // don't get unexpected results with call pseudo-instructions.
10582 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10583 if (MI.getOpcode() == AArch64::BLR ||
10584 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10585 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10586
10587 if (!Callee)
10588 return UnknownCallOutlineType;
10589
10590 // We have a function we have information about. Check it if it's something
10591 // can safely outline.
10592 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10593
10594 // We don't know what's going on with the callee at all. Don't touch it.
10595 if (!CalleeMF)
10596 return UnknownCallOutlineType;
10597
10598 // Check if we know anything about the callee saves on the function. If we
10599 // don't, then don't touch it, since that implies that we haven't
10600 // computed anything about its stack frame yet.
10601 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10602 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10603 MFI.getNumObjects() > 0)
10604 return UnknownCallOutlineType;
10605
10606 // At this point, we can say that CalleeMF ought to not pass anything on the
10607 // stack. Therefore, we can outline it.
10609 }
10610
10611 // Don't touch the link register or W30.
10612 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10613 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10615
10616 // Don't outline BTI instructions, because that will prevent the outlining
10617 // site from being indirectly callable.
10618 if (hasBTISemantics(MI))
10620
10622}
10623
10624void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10625 for (MachineInstr &MI : MBB) {
10626 const MachineOperand *Base;
10627 TypeSize Width(0, false);
10628 int64_t Offset;
10629 bool OffsetIsScalable;
10630
10631 // Is this a load or store with an immediate offset with SP as the base?
10632 if (!MI.mayLoadOrStore() ||
10633 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10634 &RI) ||
10635 (Base->isReg() && Base->getReg() != AArch64::SP))
10636 continue;
10637
10638 // It is, so we have to fix it up.
10639 TypeSize Scale(0U, false);
10640 int64_t Dummy1, Dummy2;
10641
10642 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10643 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10644 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10645 assert(Scale != 0 && "Unexpected opcode!");
10646 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10647
10648 // We've pushed the return address to the stack, so add 16 to the offset.
10649 // This is safe, since we already checked if it would overflow when we
10650 // checked if this instruction was legal to outline.
10651 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10652 StackOffsetOperand.setImm(NewImm);
10653 }
10654}
10655
10657 const AArch64InstrInfo *TII,
10658 bool ShouldSignReturnAddr) {
10659 if (!ShouldSignReturnAddr)
10660 return;
10661
10662 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10664 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10665 TII->get(AArch64::PAUTH_EPILOGUE))
10667}
10668
10669void AArch64InstrInfo::buildOutlinedFrame(
10671 const outliner::OutlinedFunction &OF) const {
10672
10673 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10674
10675 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10676 FI->setOutliningStyle("Tail Call");
10677 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10678 // For thunk outlining, rewrite the last instruction from a call to a
10679 // tail-call.
10680 MachineInstr *Call = &*--MBB.instr_end();
10681 unsigned TailOpcode;
10682 if (Call->getOpcode() == AArch64::BL) {
10683 TailOpcode = AArch64::TCRETURNdi;
10684 } else {
10685 assert(Call->getOpcode() == AArch64::BLR ||
10686 Call->getOpcode() == AArch64::BLRNoIP);
10687 TailOpcode = AArch64::TCRETURNriALL;
10688 }
10689 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10690 .add(Call->getOperand(0))
10691 .addImm(0);
10692 MBB.insert(MBB.end(), TC);
10694
10695 FI->setOutliningStyle("Thunk");
10696 }
10697
10698 bool IsLeafFunction = true;
10699
10700 // Is there a call in the outlined range?
10701 auto IsNonTailCall = [](const MachineInstr &MI) {
10702 return MI.isCall() && !MI.isReturn();
10703 };
10704
10705 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10706 // Fix up the instructions in the range, since we're going to modify the
10707 // stack.
10708
10709 // Bugzilla ID: 46767
10710 // TODO: Check if fixing up twice is safe so we can outline these.
10711 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10712 "Can only fix up stack references once");
10713 fixupPostOutline(MBB);
10714
10715 IsLeafFunction = false;
10716
10717 // LR has to be a live in so that we can save it.
10718 if (!MBB.isLiveIn(AArch64::LR))
10719 MBB.addLiveIn(AArch64::LR);
10720
10723
10724 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10725 OF.FrameConstructionID == MachineOutlinerThunk)
10726 Et = std::prev(MBB.end());
10727
10728 // Insert a save before the outlined region
10729 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10730 .addReg(AArch64::SP, RegState::Define)
10731 .addReg(AArch64::LR)
10732 .addReg(AArch64::SP)
10733 .addImm(-16);
10734 It = MBB.insert(It, STRXpre);
10735
10736 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10737 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10738
10739 // Add a CFI saying the stack was moved 16 B down.
10740 CFIBuilder.buildDefCFAOffset(16);
10741
10742 // Add a CFI saying that the LR that we want to find is now 16 B higher
10743 // than before.
10744 CFIBuilder.buildOffset(AArch64::LR, -16);
10745 }
10746
10747 // Insert a restore before the terminator for the function.
10748 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10749 .addReg(AArch64::SP, RegState::Define)
10750 .addReg(AArch64::LR, RegState::Define)
10751 .addReg(AArch64::SP)
10752 .addImm(16);
10753 Et = MBB.insert(Et, LDRXpost);
10754 }
10755
10756 auto RASignCondition = FI->getSignReturnAddressCondition();
10757 bool ShouldSignReturnAddr = AArch64FunctionInfo::shouldSignReturnAddress(
10758 RASignCondition, !IsLeafFunction);
10759
10760 // If this is a tail call outlined function, then there's already a return.
10761 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10762 OF.FrameConstructionID == MachineOutlinerThunk) {
10763 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10764 return;
10765 }
10766
10767 // It's not a tail call, so we have to insert the return ourselves.
10768
10769 // LR has to be a live in so that we can return to it.
10770 if (!MBB.isLiveIn(AArch64::LR))
10771 MBB.addLiveIn(AArch64::LR);
10772
10773 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10774 .addReg(AArch64::LR);
10775 MBB.insert(MBB.end(), ret);
10776
10777 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10778
10779 FI->setOutliningStyle("Function");
10780
10781 // Did we have to modify the stack by saving the link register?
10782 if (OF.FrameConstructionID != MachineOutlinerDefault)
10783 return;
10784
10785 // We modified the stack.
10786 // Walk over the basic block and fix up all the stack accesses.
10787 fixupPostOutline(MBB);
10788}
10789
10790MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10793
10794 // Are we tail calling?
10795 if (C.CallConstructionID == MachineOutlinerTailCall) {
10796 // If yes, then we can just branch to the label.
10797 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10798 .addGlobalAddress(M.getNamedValue(MF.getName()))
10799 .addImm(0));
10800 return It;
10801 }
10802
10803 // Are we saving the link register?
10804 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10805 C.CallConstructionID == MachineOutlinerThunk) {
10806 // No, so just insert the call.
10807 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10808 .addGlobalAddress(M.getNamedValue(MF.getName())));
10809 return It;
10810 }
10811
10812 // We want to return the spot where we inserted the call.
10814
10815 // Instructions for saving and restoring LR around the call instruction we're
10816 // going to insert.
10817 MachineInstr *Save;
10818 MachineInstr *Restore;
10819 // Can we save to a register?
10820 if (C.CallConstructionID == MachineOutlinerRegSave) {
10821 // FIXME: This logic should be sunk into a target-specific interface so that
10822 // we don't have to recompute the register.
10823 Register Reg = findRegisterToSaveLRTo(C);
10824 assert(Reg && "No callee-saved register available?");
10825
10826 // LR has to be a live in so that we can save it.
10827 if (!MBB.isLiveIn(AArch64::LR))
10828 MBB.addLiveIn(AArch64::LR);
10829
10830 // Save and restore LR from Reg.
10831 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10832 .addReg(AArch64::XZR)
10833 .addReg(AArch64::LR)
10834 .addImm(0);
10835 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10836 .addReg(AArch64::XZR)
10837 .addReg(Reg)
10838 .addImm(0);
10839 } else {
10840 // We have the default case. Save and restore from SP.
10841 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10842 .addReg(AArch64::SP, RegState::Define)
10843 .addReg(AArch64::LR)
10844 .addReg(AArch64::SP)
10845 .addImm(-16);
10846 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10847 .addReg(AArch64::SP, RegState::Define)
10848 .addReg(AArch64::LR, RegState::Define)
10849 .addReg(AArch64::SP)
10850 .addImm(16);
10851 }
10852
10853 It = MBB.insert(It, Save);
10854 It++;
10855
10856 // Insert the call.
10857 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10858 .addGlobalAddress(M.getNamedValue(MF.getName())));
10859 CallPt = It;
10860 It++;
10861
10862 It = MBB.insert(It, Restore);
10863 return CallPt;
10864}
10865
10866bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10867 MachineFunction &MF) const {
10868 return MF.getFunction().hasMinSize();
10869}
10870
10871void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10873 DebugLoc &DL,
10874 bool AllowSideEffects) const {
10875 const MachineFunction &MF = *MBB.getParent();
10876 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10877 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10878
10879 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10880 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10881 } else if (STI.isSVEorStreamingSVEAvailable()) {
10882 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10883 .addImm(0)
10884 .addImm(0);
10885 } else if (STI.isNeonAvailable()) {
10886 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10887 .addImm(0);
10888 } else {
10889 // This is a streaming-compatible function without SVE. We don't have full
10890 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10891 // So given `movi v..` would be illegal use `fmov d..` instead.
10892 assert(STI.hasNEON() && "Expected to have NEON.");
10893 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10894 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10895 }
10896}
10897
10898std::optional<DestSourcePair>
10900
10901 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10902 // and zero immediate operands used as an alias for mov instruction.
10903 if (((MI.getOpcode() == AArch64::ORRWrs &&
10904 MI.getOperand(1).getReg() == AArch64::WZR &&
10905 MI.getOperand(3).getImm() == 0x0) ||
10906 (MI.getOpcode() == AArch64::ORRWrr &&
10907 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10908 // Check that the w->w move is not a zero-extending w->x mov.
10909 (!MI.getOperand(0).getReg().isVirtual() ||
10910 MI.getOperand(0).getSubReg() == 0) &&
10911 (!MI.getOperand(0).getReg().isPhysical() ||
10912 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10913 /*TRI=*/nullptr) == -1))
10914 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10915
10916 if (MI.getOpcode() == AArch64::ORRXrs &&
10917 MI.getOperand(1).getReg() == AArch64::XZR &&
10918 MI.getOperand(3).getImm() == 0x0)
10919 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10920
10921 return std::nullopt;
10922}
10923
10924std::optional<DestSourcePair>
10926 if ((MI.getOpcode() == AArch64::ORRWrs &&
10927 MI.getOperand(1).getReg() == AArch64::WZR &&
10928 MI.getOperand(3).getImm() == 0x0) ||
10929 (MI.getOpcode() == AArch64::ORRWrr &&
10930 MI.getOperand(1).getReg() == AArch64::WZR))
10931 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10932 return std::nullopt;
10933}
10934
10935std::optional<RegImmPair>
10936AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10937 int Sign = 1;
10938 int64_t Offset = 0;
10939
10940 // TODO: Handle cases where Reg is a super- or sub-register of the
10941 // destination register.
10942 const MachineOperand &Op0 = MI.getOperand(0);
10943 if (!Op0.isReg() || Reg != Op0.getReg())
10944 return std::nullopt;
10945
10946 switch (MI.getOpcode()) {
10947 default:
10948 return std::nullopt;
10949 case AArch64::SUBWri:
10950 case AArch64::SUBXri:
10951 case AArch64::SUBSWri:
10952 case AArch64::SUBSXri:
10953 Sign *= -1;
10954 [[fallthrough]];
10955 case AArch64::ADDSWri:
10956 case AArch64::ADDSXri:
10957 case AArch64::ADDWri:
10958 case AArch64::ADDXri: {
10959 // TODO: Third operand can be global address (usually some string).
10960 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
10961 !MI.getOperand(2).isImm())
10962 return std::nullopt;
10963 int Shift = MI.getOperand(3).getImm();
10964 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10965 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
10966 }
10967 }
10968 return RegImmPair{MI.getOperand(1).getReg(), Offset};
10969}
10970
10971/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10972/// the destination register then, if possible, describe the value in terms of
10973/// the source register.
10974static std::optional<ParamLoadedValue>
10976 const TargetInstrInfo *TII,
10977 const TargetRegisterInfo *TRI) {
10978 auto DestSrc = TII->isCopyLikeInstr(MI);
10979 if (!DestSrc)
10980 return std::nullopt;
10981
10982 Register DestReg = DestSrc->Destination->getReg();
10983 Register SrcReg = DestSrc->Source->getReg();
10984
10985 if (!DestReg.isValid() || !SrcReg.isValid())
10986 return std::nullopt;
10987
10988 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10989
10990 // If the described register is the destination, just return the source.
10991 if (DestReg == DescribedReg)
10992 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10993
10994 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10995 if (MI.getOpcode() == AArch64::ORRWrs &&
10996 TRI->isSuperRegister(DestReg, DescribedReg))
10997 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10998
10999 // We may need to describe the lower part of a ORRXrs move.
11000 if (MI.getOpcode() == AArch64::ORRXrs &&
11001 TRI->isSubRegister(DestReg, DescribedReg)) {
11002 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
11003 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
11004 }
11005
11006 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
11007 "Unhandled ORR[XW]rs copy case");
11008
11009 return std::nullopt;
11010}
11011
11012bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
11013 // Functions cannot be split to different sections on AArch64 if they have
11014 // a red zone. This is because relaxing a cross-section branch may require
11015 // incrementing the stack pointer to spill a register, which would overwrite
11016 // the red zone.
11017 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
11018 return false;
11019
11021}
11022
11023bool AArch64InstrInfo::isMBBSafeToSplitToCold(
11024 const MachineBasicBlock &MBB) const {
11025 // Asm Goto blocks can contain conditional branches to goto labels, which can
11026 // get moved out of range of the branch instruction.
11027 auto isAsmGoto = [](const MachineInstr &MI) {
11028 return MI.getOpcode() == AArch64::INLINEASM_BR;
11029 };
11030 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
11031 return false;
11032
11033 // Because jump tables are label-relative instead of table-relative, they all
11034 // must be in the same section or relocation fixup handling will fail.
11035
11036 // Check if MBB is a jump table target
11037 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
11038 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
11039 return llvm::is_contained(JTE.MBBs, &MBB);
11040 };
11041 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
11042 return false;
11043
11044 // Check if MBB contains a jump table lookup
11045 for (const MachineInstr &MI : MBB) {
11046 switch (MI.getOpcode()) {
11047 case TargetOpcode::G_BRJT:
11048 case AArch64::JumpTableDest32:
11049 case AArch64::JumpTableDest16:
11050 case AArch64::JumpTableDest8:
11051 return false;
11052 default:
11053 continue;
11054 }
11055 }
11056
11057 // MBB isn't a special case, so it's safe to be split to the cold section.
11058 return true;
11059}
11060
11061std::optional<ParamLoadedValue>
11062AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
11063 Register Reg) const {
11064 const MachineFunction *MF = MI.getMF();
11065 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
11066 switch (MI.getOpcode()) {
11067 case AArch64::MOVZWi:
11068 case AArch64::MOVZXi: {
11069 // MOVZWi may be used for producing zero-extended 32-bit immediates in
11070 // 64-bit parameters, so we need to consider super-registers.
11071 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
11072 return std::nullopt;
11073
11074 if (!MI.getOperand(1).isImm())
11075 return std::nullopt;
11076 int64_t Immediate = MI.getOperand(1).getImm();
11077 int Shift = MI.getOperand(2).getImm();
11078 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
11079 nullptr);
11080 }
11081 case AArch64::ORRWrs:
11082 case AArch64::ORRXrs:
11083 return describeORRLoadedValue(MI, Reg, this, TRI);
11084 }
11085
11087}
11088
11089bool AArch64InstrInfo::isExtendLikelyToBeFolded(
11090 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
11091 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
11092 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
11093 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
11094
11095 // Anyexts are nops.
11096 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
11097 return true;
11098
11099 Register DefReg = ExtMI.getOperand(0).getReg();
11100 if (!MRI.hasOneNonDBGUse(DefReg))
11101 return false;
11102
11103 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
11104 // addressing mode.
11105 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
11106 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
11107}
11108
11109uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
11110 return get(Opc).TSFlags & AArch64::ElementSizeMask;
11111}
11112
11113bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
11114 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
11115}
11116
11117bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
11118 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
11119}
11120
11121unsigned int
11122AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
11123 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
11124}
11125
11126bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
11127 unsigned Scale) const {
11128 if (Offset && Scale)
11129 return false;
11130
11131 // Check Reg + Imm
11132 if (!Scale) {
11133 // 9-bit signed offset
11134 if (isInt<9>(Offset))
11135 return true;
11136
11137 // 12-bit unsigned offset
11138 unsigned Shift = Log2_64(NumBytes);
11139 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
11140 // Must be a multiple of NumBytes (NumBytes is a power of 2)
11141 (Offset >> Shift) << Shift == Offset)
11142 return true;
11143 return false;
11144 }
11145
11146 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
11147 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
11148}
11149
11151 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
11152 return AArch64::BLRNoIP;
11153 else
11154 return AArch64::BLR;
11155}
11156
11159 Register TargetReg, bool FrameSetup) const {
11160 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
11161
11162 MachineBasicBlock &MBB = *MBBI->getParent();
11163 MachineFunction &MF = *MBB.getParent();
11164 const AArch64InstrInfo *TII =
11165 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
11166 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
11167 DebugLoc DL = MBB.findDebugLoc(MBBI);
11168
11169 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
11170 MachineBasicBlock *LoopTestMBB =
11171 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11172 MF.insert(MBBInsertPoint, LoopTestMBB);
11173 MachineBasicBlock *LoopBodyMBB =
11174 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11175 MF.insert(MBBInsertPoint, LoopBodyMBB);
11176 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11177 MF.insert(MBBInsertPoint, ExitMBB);
11178 MachineInstr::MIFlag Flags =
11180
11181 // LoopTest:
11182 // SUB SP, SP, #ProbeSize
11183 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
11184 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
11185
11186 // CMP SP, TargetReg
11187 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
11188 AArch64::XZR)
11189 .addReg(AArch64::SP)
11190 .addReg(TargetReg)
11192 .setMIFlags(Flags);
11193
11194 // B.<Cond> LoopExit
11195 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
11197 .addMBB(ExitMBB)
11198 .setMIFlags(Flags);
11199
11200 // STR XZR, [SP]
11201 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
11202 .addReg(AArch64::XZR)
11203 .addReg(AArch64::SP)
11204 .addImm(0)
11205 .setMIFlags(Flags);
11206
11207 // B loop
11208 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
11209 .addMBB(LoopTestMBB)
11210 .setMIFlags(Flags);
11211
11212 // LoopExit:
11213 // MOV SP, TargetReg
11214 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
11215 .addReg(TargetReg)
11216 .addImm(0)
11218 .setMIFlags(Flags);
11219
11220 // LDR XZR, [SP]
11221 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
11222 .addReg(AArch64::XZR, RegState::Define)
11223 .addReg(AArch64::SP)
11224 .addImm(0)
11225 .setMIFlags(Flags);
11226
11227 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
11229
11230 LoopTestMBB->addSuccessor(ExitMBB);
11231 LoopTestMBB->addSuccessor(LoopBodyMBB);
11232 LoopBodyMBB->addSuccessor(LoopTestMBB);
11233 MBB.addSuccessor(LoopTestMBB);
11234
11235 // Update liveins.
11236 if (MF.getRegInfo().reservedRegsFrozen())
11237 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
11238
11239 return ExitMBB->begin();
11240}
11241
11242namespace {
11243class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
11244 MachineFunction *MF;
11245 const TargetInstrInfo *TII;
11246 const TargetRegisterInfo *TRI;
11248
11249 /// The block of the loop
11250 MachineBasicBlock *LoopBB;
11251 /// The conditional branch of the loop
11252 MachineInstr *CondBranch;
11253 /// The compare instruction for loop control
11254 MachineInstr *Comp;
11255 /// The number of the operand of the loop counter value in Comp
11256 unsigned CompCounterOprNum;
11257 /// The instruction that updates the loop counter value
11258 MachineInstr *Update;
11259 /// The number of the operand of the loop counter value in Update
11260 unsigned UpdateCounterOprNum;
11261 /// The initial value of the loop counter
11262 Register Init;
11263 /// True iff Update is a predecessor of Comp
11264 bool IsUpdatePriorComp;
11265
11266 /// The normalized condition used by createTripCountGreaterCondition()
11268
11269public:
11270 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
11271 MachineInstr *Comp, unsigned CompCounterOprNum,
11272 MachineInstr *Update, unsigned UpdateCounterOprNum,
11273 Register Init, bool IsUpdatePriorComp,
11275 : MF(Comp->getParent()->getParent()),
11276 TII(MF->getSubtarget().getInstrInfo()),
11277 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
11278 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
11279 CompCounterOprNum(CompCounterOprNum), Update(Update),
11280 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
11281 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
11282
11283 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
11284 // Make the instructions for loop control be placed in stage 0.
11285 // The predecessors of Comp are considered by the caller.
11286 return MI == Comp;
11287 }
11288
11289 std::optional<bool> createTripCountGreaterCondition(
11290 int TC, MachineBasicBlock &MBB,
11291 SmallVectorImpl<MachineOperand> &CondParam) override {
11292 // A branch instruction will be inserted as "if (Cond) goto epilogue".
11293 // Cond is normalized for such use.
11294 // The predecessors of the branch are assumed to have already been inserted.
11295 CondParam = Cond;
11296 return {};
11297 }
11298
11299 void createRemainingIterationsGreaterCondition(
11300 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
11301 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
11302
11303 void setPreheader(MachineBasicBlock *NewPreheader) override {}
11304
11305 void adjustTripCount(int TripCountAdjust) override {}
11306
11307 bool isMVEExpanderSupported() override { return true; }
11308};
11309} // namespace
11310
11311/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
11312/// is replaced by ReplaceReg. The output register is newly created.
11313/// The other operands are unchanged from MI.
11314static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
11315 Register ReplaceReg, MachineBasicBlock &MBB,
11316 MachineBasicBlock::iterator InsertTo) {
11317 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
11318 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
11319 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
11320 Register Result = 0;
11321 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
11322 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
11323 Result = MRI.createVirtualRegister(
11324 MRI.getRegClass(NewMI->getOperand(0).getReg()));
11325 NewMI->getOperand(I).setReg(Result);
11326 } else if (I == ReplaceOprNum) {
11327 MRI.constrainRegClass(ReplaceReg, TII->getRegClass(NewMI->getDesc(), I));
11328 NewMI->getOperand(I).setReg(ReplaceReg);
11329 }
11330 }
11331 MBB.insert(InsertTo, NewMI);
11332 return Result;
11333}
11334
11335void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
11338 // Create and accumulate conditions for next TC iterations.
11339 // Example:
11340 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
11341 // # iteration of the kernel
11342 //
11343 // # insert the following instructions
11344 // cond = CSINCXr 0, 0, C, implicit $nzcv
11345 // counter = ADDXri counter, 1 # clone from this->Update
11346 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
11347 // cond = CSINCXr cond, cond, C, implicit $nzcv
11348 // ... (repeat TC times)
11349 // SUBSXri cond, 0, implicit-def $nzcv
11350
11351 assert(CondBranch->getOpcode() == AArch64::Bcc);
11352 // CondCode to exit the loop
11354 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
11355 if (CondBranch->getOperand(1).getMBB() == LoopBB)
11357
11358 // Accumulate conditions to exit the loop
11359 Register AccCond = AArch64::XZR;
11360
11361 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
11362 auto AccumulateCond = [&](Register CurCond,
11364 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11365 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11366 .addReg(NewCond, RegState::Define)
11367 .addReg(CurCond)
11368 .addReg(CurCond)
11370 return NewCond;
11371 };
11372
11373 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11374 // Update and Comp for I==0 are already exists in MBB
11375 // (MBB is an unrolled kernel)
11376 Register Counter;
11377 for (int I = 0; I <= TC; ++I) {
11378 Register NextCounter;
11379 if (I != 0)
11380 NextCounter =
11381 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11382
11383 AccCond = AccumulateCond(AccCond, CC);
11384
11385 if (I != TC) {
11386 if (I == 0) {
11387 if (Update != Comp && IsUpdatePriorComp) {
11388 Counter =
11389 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11390 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11391 MBB.end());
11392 } else {
11393 // can use already calculated value
11394 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11395 }
11396 } else if (Update != Comp) {
11397 NextCounter =
11398 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11399 }
11400 }
11401 Counter = NextCounter;
11402 }
11403 } else {
11404 Register Counter;
11405 if (LastStage0Insts.empty()) {
11406 // use initial counter value (testing if the trip count is sufficient to
11407 // be executed by pipelined code)
11408 Counter = Init;
11409 if (IsUpdatePriorComp)
11410 Counter =
11411 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11412 } else {
11413 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11414 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11415 }
11416
11417 for (int I = 0; I <= TC; ++I) {
11418 Register NextCounter;
11419 NextCounter =
11420 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11421 AccCond = AccumulateCond(AccCond, CC);
11422 if (I != TC && Update != Comp)
11423 NextCounter =
11424 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11425 Counter = NextCounter;
11426 }
11427 }
11428
11429 // If AccCond == 0, the remainder is greater than TC.
11430 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11431 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11432 .addReg(AccCond)
11433 .addImm(0)
11434 .addImm(0);
11435 Cond.clear();
11437}
11438
11439static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11440 Register &RegMBB, Register &RegOther) {
11441 assert(Phi.getNumOperands() == 5);
11442 if (Phi.getOperand(2).getMBB() == MBB) {
11443 RegMBB = Phi.getOperand(1).getReg();
11444 RegOther = Phi.getOperand(3).getReg();
11445 } else {
11446 assert(Phi.getOperand(4).getMBB() == MBB);
11447 RegMBB = Phi.getOperand(3).getReg();
11448 RegOther = Phi.getOperand(1).getReg();
11449 }
11450}
11451
11453 if (!Reg.isVirtual())
11454 return false;
11455 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11456 return MRI.getVRegDef(Reg)->getParent() != BB;
11457}
11458
11459/// If Reg is an induction variable, return true and set some parameters
11460static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11461 MachineInstr *&UpdateInst,
11462 unsigned &UpdateCounterOprNum, Register &InitReg,
11463 bool &IsUpdatePriorComp) {
11464 // Example:
11465 //
11466 // Preheader:
11467 // InitReg = ...
11468 // LoopBB:
11469 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11470 // Reg = COPY Reg0 ; COPY is ignored.
11471 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11472 // ; Reg is the value calculated in the previous
11473 // ; iteration, so IsUpdatePriorComp == false.
11474
11475 if (LoopBB->pred_size() != 2)
11476 return false;
11477 if (!Reg.isVirtual())
11478 return false;
11479 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11480 UpdateInst = nullptr;
11481 UpdateCounterOprNum = 0;
11482 InitReg = 0;
11483 IsUpdatePriorComp = true;
11484 Register CurReg = Reg;
11485 while (true) {
11486 MachineInstr *Def = MRI.getVRegDef(CurReg);
11487 if (Def->getParent() != LoopBB)
11488 return false;
11489 if (Def->isCopy()) {
11490 // Ignore copy instructions unless they contain subregisters
11491 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11492 return false;
11493 CurReg = Def->getOperand(1).getReg();
11494 } else if (Def->isPHI()) {
11495 if (InitReg != 0)
11496 return false;
11497 if (!UpdateInst)
11498 IsUpdatePriorComp = false;
11499 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11500 } else {
11501 if (UpdateInst)
11502 return false;
11503 switch (Def->getOpcode()) {
11504 case AArch64::ADDSXri:
11505 case AArch64::ADDSWri:
11506 case AArch64::SUBSXri:
11507 case AArch64::SUBSWri:
11508 case AArch64::ADDXri:
11509 case AArch64::ADDWri:
11510 case AArch64::SUBXri:
11511 case AArch64::SUBWri:
11512 UpdateInst = Def;
11513 UpdateCounterOprNum = 1;
11514 break;
11515 case AArch64::ADDSXrr:
11516 case AArch64::ADDSWrr:
11517 case AArch64::SUBSXrr:
11518 case AArch64::SUBSWrr:
11519 case AArch64::ADDXrr:
11520 case AArch64::ADDWrr:
11521 case AArch64::SUBXrr:
11522 case AArch64::SUBWrr:
11523 UpdateInst = Def;
11524 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11525 UpdateCounterOprNum = 1;
11526 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11527 UpdateCounterOprNum = 2;
11528 else
11529 return false;
11530 break;
11531 default:
11532 return false;
11533 }
11534 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11535 }
11536
11537 if (!CurReg.isVirtual())
11538 return false;
11539 if (Reg == CurReg)
11540 break;
11541 }
11542
11543 if (!UpdateInst)
11544 return false;
11545
11546 return true;
11547}
11548
11549std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11551 // Accept loops that meet the following conditions
11552 // * The conditional branch is BCC
11553 // * The compare instruction is ADDS/SUBS/WHILEXX
11554 // * One operand of the compare is an induction variable and the other is a
11555 // loop invariant value
11556 // * The induction variable is incremented/decremented by a single instruction
11557 // * Does not contain CALL or instructions which have unmodeled side effects
11558
11559 for (MachineInstr &MI : *LoopBB)
11560 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11561 // This instruction may use NZCV, which interferes with the instruction to
11562 // be inserted for loop control.
11563 return nullptr;
11564
11565 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11567 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11568 return nullptr;
11569
11570 // Infinite loops are not supported
11571 if (TBB == LoopBB && FBB == LoopBB)
11572 return nullptr;
11573
11574 // Must be conditional branch
11575 if (TBB != LoopBB && FBB == nullptr)
11576 return nullptr;
11577
11578 assert((TBB == LoopBB || FBB == LoopBB) &&
11579 "The Loop must be a single-basic-block loop");
11580
11581 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11583
11584 if (CondBranch->getOpcode() != AArch64::Bcc)
11585 return nullptr;
11586
11587 // Normalization for createTripCountGreaterCondition()
11588 if (TBB == LoopBB)
11590
11591 MachineInstr *Comp = nullptr;
11592 unsigned CompCounterOprNum = 0;
11593 for (MachineInstr &MI : reverse(*LoopBB)) {
11594 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11595 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11596 // operands is a loop invariant value
11597
11598 switch (MI.getOpcode()) {
11599 case AArch64::SUBSXri:
11600 case AArch64::SUBSWri:
11601 case AArch64::ADDSXri:
11602 case AArch64::ADDSWri:
11603 Comp = &MI;
11604 CompCounterOprNum = 1;
11605 break;
11606 case AArch64::ADDSWrr:
11607 case AArch64::ADDSXrr:
11608 case AArch64::SUBSWrr:
11609 case AArch64::SUBSXrr:
11610 Comp = &MI;
11611 break;
11612 default:
11613 if (isWhileOpcode(MI.getOpcode())) {
11614 Comp = &MI;
11615 break;
11616 }
11617 return nullptr;
11618 }
11619
11620 if (CompCounterOprNum == 0) {
11621 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11622 CompCounterOprNum = 2;
11623 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11624 CompCounterOprNum = 1;
11625 else
11626 return nullptr;
11627 }
11628 break;
11629 }
11630 }
11631 if (!Comp)
11632 return nullptr;
11633
11634 MachineInstr *Update = nullptr;
11635 Register Init;
11636 bool IsUpdatePriorComp;
11637 unsigned UpdateCounterOprNum;
11638 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11639 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11640 return nullptr;
11641
11642 return std::make_unique<AArch64PipelinerLoopInfo>(
11643 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11644 Init, IsUpdatePriorComp, Cond);
11645}
11646
11647/// verifyInstruction - Perform target specific instruction verification.
11648bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11649 StringRef &ErrInfo) const {
11650 // Verify that immediate offsets on load/store instructions are within range.
11651 // Stack objects with an FI operand are excluded as they can be fixed up
11652 // during PEI.
11653 TypeSize Scale(0U, false), Width(0U, false);
11654 int64_t MinOffset, MaxOffset;
11655 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11656 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11657 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11658 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11659 if (Imm < MinOffset || Imm > MaxOffset) {
11660 ErrInfo = "Unexpected immediate on load/store instruction";
11661 return false;
11662 }
11663 }
11664 }
11665
11666 const MCInstrDesc &MCID = MI.getDesc();
11667 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11668 const MachineOperand &MO = MI.getOperand(Op);
11669 switch (MCID.operands()[Op].OperandType) {
11671 if (!MO.isImm() || MO.getImm() != 0) {
11672 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11673 return false;
11674 }
11675 break;
11677 if (!MO.isImm() ||
11679 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11680 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11681 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11682 return false;
11683 }
11684 break;
11685 default:
11686 break;
11687 }
11688 }
11689 return true;
11690}
11691
11692#define GET_INSTRINFO_HELPERS
11693#define GET_INSTRMAP_INFO
11694#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isFrameStoreOpcode(int Opcode)
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static bool isFrameLoadOpcode(int Opcode)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI)
static bool isANDOpcode(MachineInstr &MI)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
SignReturnAddress getSignReturnAddressCondition() const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool shouldSignReturnAddress(SignReturnAddress Condition, bool IsLRSpilled)
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:123
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:576
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:618
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:591
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:688
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
static constexpr unsigned NoRegister
Definition MCRegister.h:60
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents a location in source code.
Definition SMLoc.h:22
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:49
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:40
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2503
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
constexpr bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
static bool isSEHInstruction(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
constexpr bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1748
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2141
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1918
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:236
bool optimizeTerminators(MachineBasicBlock *MBB, const TargetInstrInfo &TII)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.