LLVM 23.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/DebugLoc.h"
45#include "llvm/IR/GlobalValue.h"
46#include "llvm/IR/Module.h"
47#include "llvm/MC/MCAsmInfo.h"
48#include "llvm/MC/MCInst.h"
50#include "llvm/MC/MCInstrDesc.h"
55#include "llvm/Support/LEB128.h"
59#include <cassert>
60#include <cstdint>
61#include <iterator>
62#include <utility>
63
64using namespace llvm;
65
66#define GET_INSTRINFO_CTOR_DTOR
67#include "AArch64GenInstrInfo.inc"
68
69#define DEBUG_TYPE "AArch64InstrInfo"
70
71STATISTIC(NumCopyInstrs, "Number of COPY instructions expanded");
72STATISTIC(NumZCRegMoveInstrsGPR, "Number of zero-cycle GPR register move "
73 "instructions expanded from canonical COPY");
74STATISTIC(NumZCRegMoveInstrsFPR, "Number of zero-cycle FPR register move "
75 "instructions expanded from canonical COPY");
76STATISTIC(NumZCZeroingInstrsGPR, "Number of zero-cycle GPR zeroing "
77 "instructions expanded from canonical COPY");
78// NumZCZeroingInstrsFPR is counted at AArch64AsmPrinter
79
81 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
82 cl::desc("Restrict range of CB instructions (DEBUG)"));
83
85 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
86 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
87
89 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
90 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
91
93 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
94 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
95
97 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
98 cl::desc("Restrict range of B instructions (DEBUG)"));
99
101 "aarch64-search-limit", cl::Hidden, cl::init(2048),
102 cl::desc("Restrict range of instructions to search for the "
103 "machine-combiner gather pattern optimization"));
104
106 : AArch64GenInstrInfo(STI, RI, AArch64::ADJCALLSTACKDOWN,
107 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
108 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
109
110/// Return the maximum number of bytes of code the specified instruction may be
111/// after LFI rewriting. If the instruction is not rewritten, std::nullopt is
112/// returned (use default sizing).
113///
114/// NOTE: the size estimates here must be kept in sync with the rewrites in
115/// AArch64MCLFIRewriter.cpp. Sizes may be overestimates of the rewritten
116/// instruction sequences.
117static std::optional<unsigned> getLFIInstSizeInBytes(const MachineInstr &MI) {
118 switch (MI.getOpcode()) {
119 case AArch64::SVC:
120 // SVC expands to 4 instructions.
121 return 16;
122 case AArch64::BR:
123 case AArch64::BLR:
124 // Indirect branches/calls expand to 2 instructions (guard + br/blr).
125 return 8;
126 case AArch64::RET:
127 // RET through LR is not rewritten, but RET through another register
128 // expands to 2 instructions (guard + ret).
129 if (MI.getOperand(0).getReg() != AArch64::LR)
130 return 8;
131 return 4;
132 default:
133 break;
134 }
135
136 // Instructions that explicitly modify LR expand to 2 instructions.
137 for (const MachineOperand &MO : MI.explicit_operands())
138 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::LR)
139 return 8;
140
141 // Default case: instructions that don't cause expansion.
142 // - TP accesses in LFI are a single load/store, so no expansion.
143 // - All remaining instructions are not rewritten.
144 return std::nullopt;
145}
146
147/// GetInstSize - Return the number of bytes of code the specified
148/// instruction may be. This returns the maximum number of bytes.
150 const MachineBasicBlock &MBB = *MI.getParent();
151 const MachineFunction *MF = MBB.getParent();
152 const Function &F = MF->getFunction();
153 const MCAsmInfo &MAI = MF->getTarget().getMCAsmInfo();
154
155 {
156 auto Op = MI.getOpcode();
157 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
158 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
159 }
160
161 // Meta-instructions emit no code.
162 if (MI.isMetaInstruction())
163 return 0;
164
165 // FIXME: We currently only handle pseudoinstructions that don't get expanded
166 // before the assembly printer.
167 unsigned NumBytes = 0;
168 const MCInstrDesc &Desc = MI.getDesc();
169
170 // LFI rewriter expansions that supersede normal sizing.
171 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
172 if (STI.isLFI())
173 if (auto Size = getLFIInstSizeInBytes(MI))
174 return *Size;
175
176 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
177 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
178
179 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
180 if (!MFI->shouldSignReturnAddress(*MF))
181 return NumBytes;
182
183 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
184 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
185 return NumBytes;
186 }
187
188 // Size should be preferably set in
189 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
190 // Specific cases handle instructions of variable sizes
191 switch (Desc.getOpcode()) {
192 default:
193 if (Desc.getSize())
194 return Desc.getSize();
195
196 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
197 // with fixed constant size but not specified in .td file) is a normal
198 // 4-byte insn.
199 NumBytes = 4;
200 break;
201 case TargetOpcode::STACKMAP:
202 // The upper bound for a stackmap intrinsic is the full length of its shadow
203 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
204 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
205 break;
206 case TargetOpcode::PATCHPOINT:
207 // The size of the patchpoint intrinsic is the number of bytes requested
208 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
209 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
210 break;
211 case TargetOpcode::STATEPOINT:
212 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
213 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
214 // No patch bytes means a normal call inst is emitted
215 if (NumBytes == 0)
216 NumBytes = 4;
217 break;
218 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
219 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
220 // instructions are expanded to the specified number of NOPs. Otherwise,
221 // they are expanded to 36-byte XRay sleds.
222 NumBytes =
223 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
224 break;
225 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
226 case TargetOpcode::PATCHABLE_TAIL_CALL:
227 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
228 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
229 NumBytes = 36;
230 break;
231 case TargetOpcode::PATCHABLE_EVENT_CALL:
232 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
233 NumBytes = 24;
234 break;
235
236 case AArch64::SPACE:
237 NumBytes = MI.getOperand(1).getImm();
238 break;
239 case AArch64::MOVaddr:
240 case AArch64::MOVaddrJT:
241 case AArch64::MOVaddrCP:
242 case AArch64::MOVaddrBA:
243 case AArch64::MOVaddrTLS:
244 case AArch64::MOVaddrEXT: {
245 // Use the same logic as the pseudo expansion to count instructions.
248 MI.getOperand(1).getTargetFlags(),
249 Subtarget.isTargetMachO(), Insn);
250 NumBytes = Insn.size() * 4;
251 break;
252 }
253
254 case AArch64::MOVi32imm:
255 case AArch64::MOVi64imm: {
256 // Use the same logic as the pseudo expansion to count instructions.
257 unsigned BitSize = Desc.getOpcode() == AArch64::MOVi32imm ? 32 : 64;
259 AArch64_IMM::expandMOVImm(MI.getOperand(1).getImm(), BitSize, Insn);
260 NumBytes = Insn.size() * 4;
261 break;
262 }
263
264 case TargetOpcode::BUNDLE:
265 NumBytes = getInstBundleSize(MI);
266 break;
267 }
268
269 return NumBytes;
270}
271
274 // Block ends with fall-through condbranch.
275 switch (LastInst->getOpcode()) {
276 default:
277 llvm_unreachable("Unknown branch instruction?");
278 case AArch64::Bcc:
279 Target = LastInst->getOperand(1).getMBB();
280 Cond.push_back(LastInst->getOperand(0));
281 break;
282 case AArch64::CBZW:
283 case AArch64::CBZX:
284 case AArch64::CBNZW:
285 case AArch64::CBNZX:
286 Target = LastInst->getOperand(1).getMBB();
287 Cond.push_back(MachineOperand::CreateImm(-1));
288 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
289 Cond.push_back(LastInst->getOperand(0));
290 break;
291 case AArch64::TBZW:
292 case AArch64::TBZX:
293 case AArch64::TBNZW:
294 case AArch64::TBNZX:
295 Target = LastInst->getOperand(2).getMBB();
296 Cond.push_back(MachineOperand::CreateImm(-1));
297 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
298 Cond.push_back(LastInst->getOperand(0));
299 Cond.push_back(LastInst->getOperand(1));
300 break;
301 case AArch64::CBWPri:
302 case AArch64::CBXPri:
303 case AArch64::CBWPrr:
304 case AArch64::CBXPrr:
305 Target = LastInst->getOperand(3).getMBB();
306 Cond.push_back(MachineOperand::CreateImm(-1));
307 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
308 Cond.push_back(LastInst->getOperand(0));
309 Cond.push_back(LastInst->getOperand(1));
310 Cond.push_back(LastInst->getOperand(2));
311 break;
312 case AArch64::CBBAssertExt:
313 case AArch64::CBHAssertExt:
314 Target = LastInst->getOperand(3).getMBB();
315 Cond.push_back(MachineOperand::CreateImm(-1)); // -1
316 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); // Opc
317 Cond.push_back(LastInst->getOperand(0)); // Cond
318 Cond.push_back(LastInst->getOperand(1)); // Op0
319 Cond.push_back(LastInst->getOperand(2)); // Op1
320 Cond.push_back(LastInst->getOperand(4)); // Ext0
321 Cond.push_back(LastInst->getOperand(5)); // Ext1
322 break;
323 }
324}
325
326static unsigned getBranchDisplacementBits(unsigned Opc) {
327 switch (Opc) {
328 default:
329 llvm_unreachable("unexpected opcode!");
330 case AArch64::B:
331 return BDisplacementBits;
332 case AArch64::TBNZW:
333 case AArch64::TBZW:
334 case AArch64::TBNZX:
335 case AArch64::TBZX:
336 return TBZDisplacementBits;
337 case AArch64::CBNZW:
338 case AArch64::CBZW:
339 case AArch64::CBNZX:
340 case AArch64::CBZX:
341 return CBZDisplacementBits;
342 case AArch64::Bcc:
343 return BCCDisplacementBits;
344 case AArch64::CBWPri:
345 case AArch64::CBXPri:
346 case AArch64::CBBAssertExt:
347 case AArch64::CBHAssertExt:
348 case AArch64::CBWPrr:
349 case AArch64::CBXPrr:
350 return CBDisplacementBits;
351 }
352}
353
355 int64_t BrOffset) const {
356 unsigned Bits = getBranchDisplacementBits(BranchOp);
357 assert(Bits >= 3 && "max branch displacement must be enough to jump"
358 "over conditional branch expansion");
359 return isIntN(Bits, BrOffset / 4);
360}
361
364 switch (MI.getOpcode()) {
365 default:
366 llvm_unreachable("unexpected opcode!");
367 case AArch64::B:
368 return MI.getOperand(0).getMBB();
369 case AArch64::TBZW:
370 case AArch64::TBNZW:
371 case AArch64::TBZX:
372 case AArch64::TBNZX:
373 return MI.getOperand(2).getMBB();
374 case AArch64::CBZW:
375 case AArch64::CBNZW:
376 case AArch64::CBZX:
377 case AArch64::CBNZX:
378 case AArch64::Bcc:
379 return MI.getOperand(1).getMBB();
380 case AArch64::CBWPri:
381 case AArch64::CBXPri:
382 case AArch64::CBBAssertExt:
383 case AArch64::CBHAssertExt:
384 case AArch64::CBWPrr:
385 case AArch64::CBXPrr:
386 return MI.getOperand(3).getMBB();
387 }
388}
389
391 MachineBasicBlock &NewDestBB,
392 MachineBasicBlock &RestoreBB,
393 const DebugLoc &DL,
394 int64_t BrOffset,
395 RegScavenger *RS) const {
396 assert(RS && "RegScavenger required for long branching");
397 assert(MBB.empty() &&
398 "new block should be inserted for expanding unconditional branch");
399 assert(MBB.pred_size() == 1);
400 assert(RestoreBB.empty() &&
401 "restore block should be inserted for restoring clobbered registers");
402
403 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
404 // Offsets outside of the signed 33-bit range are not supported for ADRP +
405 // ADD.
406 if (!isInt<33>(BrOffset))
408 "Branch offsets outside of the signed 33-bit range not supported");
409
410 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
411 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
412 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
413 .addReg(Reg)
414 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
415 .addImm(0);
416 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
417 };
418
419 RS->enterBasicBlockEnd(MBB);
420 // If X16 is unused, we can rely on the linker to insert a range extension
421 // thunk if NewDestBB is out of range of a single B instruction.
422 constexpr Register Reg = AArch64::X16;
423 if (!RS->isRegUsed(Reg)) {
424 insertUnconditionalBranch(MBB, &NewDestBB, DL);
425 RS->setRegUsed(Reg);
426 return;
427 }
428
429 // In a cold block without BTI, insert the indirect branch if a register is
430 // free. Skip this if BTI is enabled to avoid inserting a BTI at the target,
431 // prioritizing a dynamic cost in cold code over a static cost in hot code.
432 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
433 bool HasBTI = AFI && AFI->branchTargetEnforcement();
434 if (MBB.getSectionID() == MBBSectionID::ColdSectionID && !HasBTI) {
435 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
436 if (Scavenged != AArch64::NoRegister) {
437 buildIndirectBranch(Scavenged, NewDestBB);
438 RS->setRegUsed(Scavenged);
439 return;
440 }
441 }
442
443 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
444 // with red zones.
445 if (!AFI || AFI->hasRedZone().value_or(true))
447 "Unable to insert indirect branch inside function that has red zone");
448
449 // Otherwise, spill X16 and defer range extension to the linker.
450 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
451 .addReg(AArch64::SP, RegState::Define)
452 .addReg(Reg)
453 .addReg(AArch64::SP)
454 .addImm(-16);
455
456 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
457
458 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
459 .addReg(AArch64::SP, RegState::Define)
461 .addReg(AArch64::SP)
462 .addImm(16);
463}
464
465// Branch analysis.
468 MachineBasicBlock *&FBB,
470 bool AllowModify) const {
471 // If the block has no terminators, it just falls into the block after it.
472 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
473 if (I == MBB.end())
474 return false;
475
476 // Skip over SpeculationBarrierEndBB terminators
477 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
478 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
479 --I;
480 }
481
482 if (!isUnpredicatedTerminator(*I))
483 return false;
484
485 // Get the last instruction in the block.
486 MachineInstr *LastInst = &*I;
487
488 // If there is only one terminator instruction, process it.
489 unsigned LastOpc = LastInst->getOpcode();
490 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
491 if (isUncondBranchOpcode(LastOpc)) {
492 TBB = LastInst->getOperand(0).getMBB();
493 return false;
494 }
495 if (isCondBranchOpcode(LastOpc)) {
496 // Block ends with fall-through condbranch.
497 parseCondBranch(LastInst, TBB, Cond);
498 return false;
499 }
500 return true; // Can't handle indirect branch.
501 }
502
503 // Get the instruction before it if it is a terminator.
504 MachineInstr *SecondLastInst = &*I;
505 unsigned SecondLastOpc = SecondLastInst->getOpcode();
506
507 // If AllowModify is true and the block ends with two or more unconditional
508 // branches, delete all but the first unconditional branch.
509 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
510 while (isUncondBranchOpcode(SecondLastOpc)) {
511 LastInst->eraseFromParent();
512 LastInst = SecondLastInst;
513 LastOpc = LastInst->getOpcode();
514 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
515 // Return now the only terminator is an unconditional branch.
516 TBB = LastInst->getOperand(0).getMBB();
517 return false;
518 }
519 SecondLastInst = &*I;
520 SecondLastOpc = SecondLastInst->getOpcode();
521 }
522 }
523
524 // If we're allowed to modify and the block ends in a unconditional branch
525 // which could simply fallthrough, remove the branch. (Note: This case only
526 // matters when we can't understand the whole sequence, otherwise it's also
527 // handled by BranchFolding.cpp.)
528 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
529 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
530 LastInst->eraseFromParent();
531 LastInst = SecondLastInst;
532 LastOpc = LastInst->getOpcode();
533 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
534 assert(!isUncondBranchOpcode(LastOpc) &&
535 "unreachable unconditional branches removed above");
536
537 if (isCondBranchOpcode(LastOpc)) {
538 // Block ends with fall-through condbranch.
539 parseCondBranch(LastInst, TBB, Cond);
540 return false;
541 }
542 return true; // Can't handle indirect branch.
543 }
544 SecondLastInst = &*I;
545 SecondLastOpc = SecondLastInst->getOpcode();
546 }
547
548 // If there are three terminators, we don't know what sort of block this is.
549 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
550 return true;
551
552 // If the block ends with a B and a Bcc, handle it.
553 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
554 parseCondBranch(SecondLastInst, TBB, Cond);
555 FBB = LastInst->getOperand(0).getMBB();
556 return false;
557 }
558
559 // If the block ends with two unconditional branches, handle it. The second
560 // one is not executed, so remove it.
561 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
562 TBB = SecondLastInst->getOperand(0).getMBB();
563 I = LastInst;
564 if (AllowModify)
565 I->eraseFromParent();
566 return false;
567 }
568
569 // ...likewise if it ends with an indirect branch followed by an unconditional
570 // branch.
571 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
572 I = LastInst;
573 if (AllowModify)
574 I->eraseFromParent();
575 return true;
576 }
577
578 // Otherwise, can't handle this.
579 return true;
580}
581
583 MachineBranchPredicate &MBP,
584 bool AllowModify) const {
585 // Use analyzeBranch to validate the branch pattern.
586 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
588 if (analyzeBranch(MBB, TBB, FBB, Cond, AllowModify))
589 return true;
590
591 // analyzeBranch returns success with empty Cond for unconditional branches.
592 if (Cond.empty())
593 return true;
594
595 MBP.TrueDest = TBB;
596 assert(MBP.TrueDest && "expected!");
597 MBP.FalseDest = FBB ? FBB : MBB.getNextNode();
598
599 MBP.ConditionDef = nullptr;
600 MBP.SingleUseCondition = false;
601
602 // Find the conditional branch. After analyzeBranch succeeds with non-empty
603 // Cond, there's exactly one conditional branch - either last (fallthrough)
604 // or second-to-last (followed by unconditional B).
605 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
606 if (I == MBB.end())
607 return true;
608
609 if (isUncondBranchOpcode(I->getOpcode())) {
610 if (I == MBB.begin())
611 return true;
612 --I;
613 }
614
615 MachineInstr *CondBranch = &*I;
616 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
617
618 switch (CondBranch->getOpcode()) {
619 default:
620 return true;
621
622 case AArch64::Bcc:
623 // Bcc takes the NZCV flag as the operand to branch on, walk up the
624 // instruction stream to find the last instruction to define NZCV.
626 if (MI.modifiesRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
627 MBP.ConditionDef = &MI;
628 break;
629 }
630 }
631 return false;
632
633 case AArch64::CBZW:
634 case AArch64::CBZX:
635 case AArch64::CBNZW:
636 case AArch64::CBNZX: {
637 MBP.LHS = CondBranch->getOperand(0);
638 MBP.RHS = MachineOperand::CreateImm(0);
639 unsigned Opc = CondBranch->getOpcode();
640 MBP.Predicate = (Opc == AArch64::CBNZX || Opc == AArch64::CBNZW)
641 ? MachineBranchPredicate::PRED_NE
642 : MachineBranchPredicate::PRED_EQ;
643 Register CondReg = MBP.LHS.getReg();
644 if (CondReg.isVirtual())
645 MBP.ConditionDef = MRI.getVRegDef(CondReg);
646 return false;
647 }
648
649 case AArch64::TBZW:
650 case AArch64::TBZX:
651 case AArch64::TBNZW:
652 case AArch64::TBNZX: {
653 Register CondReg = CondBranch->getOperand(0).getReg();
654 if (CondReg.isVirtual())
655 MBP.ConditionDef = MRI.getVRegDef(CondReg);
656 return false;
657 }
658 }
659}
660
663 if (Cond[0].getImm() != -1) {
664 // Regular Bcc
665 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
667 } else {
668 // Folded compare-and-branch
669 switch (Cond[1].getImm()) {
670 default:
671 llvm_unreachable("Unknown conditional branch!");
672 case AArch64::CBZW:
673 Cond[1].setImm(AArch64::CBNZW);
674 break;
675 case AArch64::CBNZW:
676 Cond[1].setImm(AArch64::CBZW);
677 break;
678 case AArch64::CBZX:
679 Cond[1].setImm(AArch64::CBNZX);
680 break;
681 case AArch64::CBNZX:
682 Cond[1].setImm(AArch64::CBZX);
683 break;
684 case AArch64::TBZW:
685 Cond[1].setImm(AArch64::TBNZW);
686 break;
687 case AArch64::TBNZW:
688 Cond[1].setImm(AArch64::TBZW);
689 break;
690 case AArch64::TBZX:
691 Cond[1].setImm(AArch64::TBNZX);
692 break;
693 case AArch64::TBNZX:
694 Cond[1].setImm(AArch64::TBZX);
695 break;
696
697 // Cond is { -1, Opcode, CC, Op0, Op1, ... }
698 case AArch64::CBWPri:
699 case AArch64::CBXPri:
700 case AArch64::CBBAssertExt:
701 case AArch64::CBHAssertExt:
702 case AArch64::CBWPrr:
703 case AArch64::CBXPrr: {
704 // Pseudos using standard 4bit Arm condition codes
706 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
708 }
709 }
710 }
711
712 return false;
713}
714
716 int *BytesRemoved) const {
717 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
718 if (I == MBB.end())
719 return 0;
720
721 if (!isUncondBranchOpcode(I->getOpcode()) &&
722 !isCondBranchOpcode(I->getOpcode()))
723 return 0;
724
725 // Remove the branch.
726 I->eraseFromParent();
727
728 I = MBB.end();
729
730 if (I == MBB.begin()) {
731 if (BytesRemoved)
732 *BytesRemoved = 4;
733 return 1;
734 }
735 --I;
736 if (!isCondBranchOpcode(I->getOpcode())) {
737 if (BytesRemoved)
738 *BytesRemoved = 4;
739 return 1;
740 }
741
742 // Remove the branch.
743 I->eraseFromParent();
744 if (BytesRemoved)
745 *BytesRemoved = 8;
746
747 return 2;
748}
749
750void AArch64InstrInfo::instantiateCondBranch(
753 if (Cond[0].getImm() != -1) {
754 // Regular Bcc
755 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
756 } else {
757 // Folded compare-and-branch
758 // Note that we use addOperand instead of addReg to keep the flags.
759
760 // cbz, cbnz
761 const MachineInstrBuilder MIB =
762 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
763
764 // tbz/tbnz
765 if (Cond.size() > 3)
766 MIB.add(Cond[3]);
767
768 // cb
769 if (Cond.size() > 4)
770 MIB.add(Cond[4]);
771
772 MIB.addMBB(TBB);
773
774 // cb[b,h]
775 if (Cond.size() > 5) {
776 MIB.addImm(Cond[5].getImm());
777 MIB.addImm(Cond[6].getImm());
778 }
779 }
780}
781
784 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
785 // Shouldn't be a fall through.
786 assert(TBB && "insertBranch must not be told to insert a fallthrough");
787
788 if (!FBB) {
789 if (Cond.empty()) // Unconditional branch?
790 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
791 else
792 instantiateCondBranch(MBB, DL, TBB, Cond);
793
794 if (BytesAdded)
795 *BytesAdded = 4;
796
797 return 1;
798 }
799
800 // Two-way conditional branch.
801 instantiateCondBranch(MBB, DL, TBB, Cond);
802 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
803
804 if (BytesAdded)
805 *BytesAdded = 8;
806
807 return 2;
808}
809
811 const TargetInstrInfo &TII) {
812 for (MachineInstr &MI : MBB->terminators()) {
813 unsigned Opc = MI.getOpcode();
814 switch (Opc) {
815 case AArch64::CBZW:
816 case AArch64::CBZX:
817 case AArch64::TBZW:
818 case AArch64::TBZX:
819 // CBZ/TBZ with WZR/XZR -> unconditional B
820 if (MI.getOperand(0).getReg() == AArch64::WZR ||
821 MI.getOperand(0).getReg() == AArch64::XZR) {
822 DEBUG_WITH_TYPE("optimizeTerminators",
823 dbgs() << "Removing always taken branch: " << MI);
824 MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
825 SmallVector<MachineBasicBlock *> Succs(MBB->successors());
826 for (auto *S : Succs)
827 if (S != Target)
828 MBB->removeSuccessor(S);
829 DebugLoc DL = MI.getDebugLoc();
830 while (MBB->rbegin() != &MI)
831 MBB->rbegin()->eraseFromParent();
832 MI.eraseFromParent();
833 BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
834 return true;
835 }
836 break;
837 case AArch64::CBNZW:
838 case AArch64::CBNZX:
839 case AArch64::TBNZW:
840 case AArch64::TBNZX:
841 // CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
842 if (MI.getOperand(0).getReg() == AArch64::WZR ||
843 MI.getOperand(0).getReg() == AArch64::XZR) {
844 DEBUG_WITH_TYPE("optimizeTerminators",
845 dbgs() << "Removing never taken branch: " << MI);
846 MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
847 MI.getParent()->removeSuccessor(Target);
848 MI.eraseFromParent();
849 return true;
850 }
851 break;
852 }
853 }
854 return false;
855}
856
857// Find the original register that VReg is copied from.
858static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
859 while (Register::isVirtualRegister(VReg)) {
860 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
861 if (!DefMI->isFullCopy())
862 return VReg;
863 VReg = DefMI->getOperand(1).getReg();
864 }
865 return VReg;
866}
867
868// Determine if VReg is defined by an instruction that can be folded into a
869// csel instruction. If so, return the folded opcode, and the replacement
870// register.
871static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
872 unsigned *NewReg = nullptr) {
873 VReg = removeCopies(MRI, VReg);
875 return 0;
876
877 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
878 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
879 unsigned Opc = 0;
880 unsigned SrcReg = 0;
881 switch (DefMI->getOpcode()) {
882 case AArch64::SUBREG_TO_REG:
883 // Check for the following way to define an 64-bit immediate:
884 // %0:gpr32 = MOVi32imm 1
885 // %1:gpr64 = SUBREG_TO_REG %0:gpr32, %subreg.sub_32
886 if (!DefMI->getOperand(1).isReg())
887 return 0;
888 if (!DefMI->getOperand(2).isImm() ||
889 DefMI->getOperand(2).getImm() != AArch64::sub_32)
890 return 0;
891 DefMI = MRI.getVRegDef(DefMI->getOperand(1).getReg());
892 if (DefMI->getOpcode() != AArch64::MOVi32imm)
893 return 0;
894 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
895 return 0;
896 assert(Is64Bit);
897 SrcReg = AArch64::XZR;
898 Opc = AArch64::CSINCXr;
899 break;
900
901 case AArch64::MOVi32imm:
902 case AArch64::MOVi64imm:
903 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
904 return 0;
905 SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
906 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
907 break;
908
909 case AArch64::ADDSXri:
910 case AArch64::ADDSWri:
911 // if NZCV is used, do not fold.
912 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
913 true) == -1)
914 return 0;
915 // fall-through to ADDXri and ADDWri.
916 [[fallthrough]];
917 case AArch64::ADDXri:
918 case AArch64::ADDWri:
919 // add x, 1 -> csinc.
920 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
921 DefMI->getOperand(3).getImm() != 0)
922 return 0;
923 SrcReg = DefMI->getOperand(1).getReg();
924 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
925 break;
926
927 case AArch64::ORNXrr:
928 case AArch64::ORNWrr: {
929 // not x -> csinv, represented as orn dst, xzr, src.
930 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
931 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
932 return 0;
933 SrcReg = DefMI->getOperand(2).getReg();
934 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
935 break;
936 }
937
938 case AArch64::SUBSXrr:
939 case AArch64::SUBSWrr:
940 // if NZCV is used, do not fold.
941 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
942 true) == -1)
943 return 0;
944 // fall-through to SUBXrr and SUBWrr.
945 [[fallthrough]];
946 case AArch64::SUBXrr:
947 case AArch64::SUBWrr: {
948 // neg x -> csneg, represented as sub dst, xzr, src.
949 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
950 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
951 return 0;
952 SrcReg = DefMI->getOperand(2).getReg();
953 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
954 break;
955 }
956 default:
957 return 0;
958 }
959 assert(Opc && SrcReg && "Missing parameters");
960
961 if (NewReg)
962 *NewReg = SrcReg;
963 return Opc;
964}
965
968 Register DstReg, Register TrueReg,
969 Register FalseReg, int &CondCycles,
970 int &TrueCycles,
971 int &FalseCycles) const {
972 // Check register classes.
973 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
974 const TargetRegisterClass *RC =
975 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
976 if (!RC)
977 return false;
978
979 // Also need to check the dest regclass, in case we're trying to optimize
980 // something like:
981 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
982 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
983 return false;
984
985 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
986 unsigned ExtraCondLat = Cond.size() != 1;
987
988 // GPRs are handled by csel.
989 // FIXME: Fold in x+1, -x, and ~x when applicable.
990 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
991 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
992 // Single-cycle csel, csinc, csinv, and csneg.
993 CondCycles = 1 + ExtraCondLat;
994 TrueCycles = FalseCycles = 1;
995 if (canFoldIntoCSel(MRI, TrueReg))
996 TrueCycles = 0;
997 else if (canFoldIntoCSel(MRI, FalseReg))
998 FalseCycles = 0;
999 return true;
1000 }
1001
1002 // Scalar floating point is handled by fcsel.
1003 // FIXME: Form fabs, fmin, and fmax when applicable.
1004 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
1005 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
1006 CondCycles = 5 + ExtraCondLat;
1007 TrueCycles = FalseCycles = 2;
1008 return true;
1009 }
1010
1011 // Can't do vectors.
1012 return false;
1013}
1014
1017 const DebugLoc &DL, Register DstReg,
1019 Register TrueReg, Register FalseReg) const {
1020 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1021
1022 // Parse the condition code, see parseCondBranch() above.
1024 switch (Cond.size()) {
1025 default:
1026 llvm_unreachable("Unknown condition opcode in Cond");
1027 case 1: // b.cc
1028 CC = AArch64CC::CondCode(Cond[0].getImm());
1029 break;
1030 case 3: { // cbz/cbnz
1031 // We must insert a compare against 0.
1032 bool Is64Bit;
1033 switch (Cond[1].getImm()) {
1034 default:
1035 llvm_unreachable("Unknown branch opcode in Cond");
1036 case AArch64::CBZW:
1037 Is64Bit = false;
1038 CC = AArch64CC::EQ;
1039 break;
1040 case AArch64::CBZX:
1041 Is64Bit = true;
1042 CC = AArch64CC::EQ;
1043 break;
1044 case AArch64::CBNZW:
1045 Is64Bit = false;
1046 CC = AArch64CC::NE;
1047 break;
1048 case AArch64::CBNZX:
1049 Is64Bit = true;
1050 CC = AArch64CC::NE;
1051 break;
1052 }
1053 Register SrcReg = Cond[2].getReg();
1054 if (Is64Bit) {
1055 // cmp reg, #0 is actually subs xzr, reg, #0.
1056 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
1057 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
1058 .addReg(SrcReg)
1059 .addImm(0)
1060 .addImm(0);
1061 } else {
1062 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
1063 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
1064 .addReg(SrcReg)
1065 .addImm(0)
1066 .addImm(0);
1067 }
1068 break;
1069 }
1070 case 4: { // tbz/tbnz
1071 // We must insert a tst instruction.
1072 switch (Cond[1].getImm()) {
1073 default:
1074 llvm_unreachable("Unknown branch opcode in Cond");
1075 case AArch64::TBZW:
1076 case AArch64::TBZX:
1077 CC = AArch64CC::EQ;
1078 break;
1079 case AArch64::TBNZW:
1080 case AArch64::TBNZX:
1081 CC = AArch64CC::NE;
1082 break;
1083 }
1084 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
1085 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
1086 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
1087 .addReg(Cond[2].getReg())
1088 .addImm(
1090 else
1091 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
1092 .addReg(Cond[2].getReg())
1093 .addImm(
1095 break;
1096 }
1097 case 5: { // cb
1098 // We must insert a cmp, that is a subs
1099 // 0 1 2 3 4
1100 // Cond is { -1, Opcode, CC, Op0, Op1 }
1101
1102 unsigned SubsOpc, SubsDestReg;
1103 bool IsImm = false;
1104 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1105 switch (Cond[1].getImm()) {
1106 default:
1107 llvm_unreachable("Unknown branch opcode in Cond");
1108 case AArch64::CBWPri:
1109 SubsOpc = AArch64::SUBSWri;
1110 SubsDestReg = AArch64::WZR;
1111 IsImm = true;
1112 break;
1113 case AArch64::CBXPri:
1114 SubsOpc = AArch64::SUBSXri;
1115 SubsDestReg = AArch64::XZR;
1116 IsImm = true;
1117 break;
1118 case AArch64::CBWPrr:
1119 SubsOpc = AArch64::SUBSWrr;
1120 SubsDestReg = AArch64::WZR;
1121 IsImm = false;
1122 break;
1123 case AArch64::CBXPrr:
1124 SubsOpc = AArch64::SUBSXrr;
1125 SubsDestReg = AArch64::XZR;
1126 IsImm = false;
1127 break;
1128 }
1129
1130 if (IsImm)
1131 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
1132 .addReg(Cond[3].getReg())
1133 .addImm(Cond[4].getImm())
1134 .addImm(0);
1135 else
1136 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
1137 .addReg(Cond[3].getReg())
1138 .addReg(Cond[4].getReg());
1139 } break;
1140 case 7: { // cb[b,h]
1141 // We must insert a cmp, that is a subs, but also zero- or sign-extensions
1142 // that have been folded. For the first operand we codegen an explicit
1143 // extension, for the second operand we fold the extension into cmp.
1144 // 0 1 2 3 4 5 6
1145 // Cond is { -1, Opcode, CC, Op0, Op1, Ext0, Ext1 }
1146
1147 // We need a new register for the now explicitly extended register
1148 Register Reg = Cond[4].getReg();
1150 unsigned ExtOpc;
1151 unsigned ExtBits;
1152 AArch64_AM::ShiftExtendType ExtendType =
1154 switch (ExtendType) {
1155 default:
1156 llvm_unreachable("Unknown shift-extend for CB instruction");
1157 case AArch64_AM::SXTB:
1158 assert(
1159 Cond[1].getImm() == AArch64::CBBAssertExt &&
1160 "Unexpected compare-and-branch instruction for SXTB shift-extend");
1161 ExtOpc = AArch64::SBFMWri;
1162 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1163 break;
1164 case AArch64_AM::SXTH:
1165 assert(
1166 Cond[1].getImm() == AArch64::CBHAssertExt &&
1167 "Unexpected compare-and-branch instruction for SXTH shift-extend");
1168 ExtOpc = AArch64::SBFMWri;
1169 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1170 break;
1171 case AArch64_AM::UXTB:
1172 assert(
1173 Cond[1].getImm() == AArch64::CBBAssertExt &&
1174 "Unexpected compare-and-branch instruction for UXTB shift-extend");
1175 ExtOpc = AArch64::ANDWri;
1176 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1177 break;
1178 case AArch64_AM::UXTH:
1179 assert(
1180 Cond[1].getImm() == AArch64::CBHAssertExt &&
1181 "Unexpected compare-and-branch instruction for UXTH shift-extend");
1182 ExtOpc = AArch64::ANDWri;
1183 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1184 break;
1185 }
1186
1187 // Build the explicit extension of the first operand
1188 Reg = MRI.createVirtualRegister(&AArch64::GPR32spRegClass);
1190 BuildMI(MBB, I, DL, get(ExtOpc), Reg).addReg(Cond[4].getReg());
1191 if (ExtOpc != AArch64::ANDWri)
1192 MBBI.addImm(0);
1193 MBBI.addImm(ExtBits);
1194 }
1195
1196 // Now, subs with an extended second operand
1198 AArch64_AM::ShiftExtendType ExtendType =
1200 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1201 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1202 BuildMI(MBB, I, DL, get(AArch64::SUBSWrx), AArch64::WZR)
1203 .addReg(Cond[3].getReg())
1204 .addReg(Reg)
1205 .addImm(AArch64_AM::getArithExtendImm(ExtendType, 0));
1206 } // If no extension is needed, just a regular subs
1207 else {
1208 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1209 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1210 BuildMI(MBB, I, DL, get(AArch64::SUBSWrr), AArch64::WZR)
1211 .addReg(Cond[3].getReg())
1212 .addReg(Reg);
1213 }
1214
1215 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1216 } break;
1217 }
1218
1219 unsigned Opc = 0;
1220 const TargetRegisterClass *RC = nullptr;
1221 bool TryFold = false;
1222 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
1223 RC = &AArch64::GPR64RegClass;
1224 Opc = AArch64::CSELXr;
1225 TryFold = true;
1226 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
1227 RC = &AArch64::GPR32RegClass;
1228 Opc = AArch64::CSELWr;
1229 TryFold = true;
1230 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
1231 RC = &AArch64::FPR64RegClass;
1232 Opc = AArch64::FCSELDrrr;
1233 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
1234 RC = &AArch64::FPR32RegClass;
1235 Opc = AArch64::FCSELSrrr;
1236 }
1237 assert(RC && "Unsupported regclass");
1238
1239 // Try folding simple instructions into the csel.
1240 if (TryFold) {
1241 unsigned NewReg = 0;
1242 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
1243 if (FoldedOpc) {
1244 // The folded opcodes csinc, csinc and csneg apply the operation to
1245 // FalseReg, so we need to invert the condition.
1247 TrueReg = FalseReg;
1248 } else
1249 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
1250
1251 // Fold the operation. Leave any dead instructions for DCE to clean up.
1252 if (FoldedOpc) {
1253 FalseReg = NewReg;
1254 Opc = FoldedOpc;
1255 // Extend the live range of NewReg.
1256 MRI.clearKillFlags(NewReg);
1257 }
1258 }
1259
1260 // Pull all virtual register into the appropriate class.
1261 MRI.constrainRegClass(TrueReg, RC);
1262 // FalseReg might be WZR or XZR if the folded operand is a literal 1.
1263 assert(
1264 (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
1265 FalseReg == AArch64::XZR) &&
1266 "FalseReg was folded into a non-virtual register other than WZR or XZR");
1267 if (FalseReg.isVirtual())
1268 MRI.constrainRegClass(FalseReg, RC);
1269
1270 // Insert the csel.
1271 BuildMI(MBB, I, DL, get(Opc), DstReg)
1272 .addReg(TrueReg)
1273 .addReg(FalseReg)
1274 .addImm(CC);
1275}
1276
1277// Return true if Imm can be loaded into a register by a "cheap" sequence of
1278// instructions. For now, "cheap" means at most two instructions.
1279static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1280 if (BitSize == 32)
1281 return true;
1282
1283 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1284 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1286 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1287
1288 return Is.size() <= 2;
1289}
1290
1291// Check if a COPY instruction is cheap.
1292static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI) {
1293 assert(MI.isCopy() && "Expected COPY instruction");
1294 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1295
1296 // Cross-bank copies (e.g., between GPR and FPR) are expensive on AArch64,
1297 // typically requiring an FMOV instruction with a 2-6 cycle latency.
1298 auto GetRegClass = [&](Register Reg) -> const TargetRegisterClass * {
1299 if (Reg.isVirtual())
1300 return MRI.getRegClass(Reg);
1301 if (Reg.isPhysical())
1302 return RI.getMinimalPhysRegClass(Reg);
1303 return nullptr;
1304 };
1305 const TargetRegisterClass *DstRC = GetRegClass(MI.getOperand(0).getReg());
1306 const TargetRegisterClass *SrcRC = GetRegClass(MI.getOperand(1).getReg());
1307 if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC))
1308 return false;
1309
1310 return MI.isAsCheapAsAMove();
1311}
1312
1313// FIXME: this implementation should be micro-architecture dependent, so a
1314// micro-architecture target hook should be introduced here in future.
1316 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1317 if (isExynosCheapAsMove(MI))
1318 return true;
1319 return MI.isAsCheapAsAMove();
1320 }
1321
1322 switch (MI.getOpcode()) {
1323 default:
1324 return MI.isAsCheapAsAMove();
1325
1326 case TargetOpcode::COPY:
1327 return isCheapCopy(MI, RI);
1328
1329 case AArch64::ADDWrs:
1330 case AArch64::ADDXrs:
1331 case AArch64::SUBWrs:
1332 case AArch64::SUBXrs:
1333 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1334
1335 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1336 // ORRXri, it is as cheap as MOV.
1337 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1338 case AArch64::MOVi32imm:
1339 return isCheapImmediate(MI, 32);
1340 case AArch64::MOVi64imm:
1341 return isCheapImmediate(MI, 64);
1342 }
1343}
1344
1345bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1346 switch (MI.getOpcode()) {
1347 default:
1348 return false;
1349
1350 case AArch64::ADDWrs:
1351 case AArch64::ADDXrs:
1352 case AArch64::ADDSWrs:
1353 case AArch64::ADDSXrs: {
1354 unsigned Imm = MI.getOperand(3).getImm();
1355 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1356 if (ShiftVal == 0)
1357 return true;
1358 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1359 }
1360
1361 case AArch64::ADDWrx:
1362 case AArch64::ADDXrx:
1363 case AArch64::ADDXrx64:
1364 case AArch64::ADDSWrx:
1365 case AArch64::ADDSXrx:
1366 case AArch64::ADDSXrx64: {
1367 unsigned Imm = MI.getOperand(3).getImm();
1368 switch (AArch64_AM::getArithExtendType(Imm)) {
1369 default:
1370 return false;
1371 case AArch64_AM::UXTB:
1372 case AArch64_AM::UXTH:
1373 case AArch64_AM::UXTW:
1374 case AArch64_AM::UXTX:
1375 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1376 }
1377 }
1378
1379 case AArch64::SUBWrs:
1380 case AArch64::SUBSWrs: {
1381 unsigned Imm = MI.getOperand(3).getImm();
1382 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1383 return ShiftVal == 0 ||
1384 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1385 }
1386
1387 case AArch64::SUBXrs:
1388 case AArch64::SUBSXrs: {
1389 unsigned Imm = MI.getOperand(3).getImm();
1390 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1391 return ShiftVal == 0 ||
1392 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1393 }
1394
1395 case AArch64::SUBWrx:
1396 case AArch64::SUBXrx:
1397 case AArch64::SUBXrx64:
1398 case AArch64::SUBSWrx:
1399 case AArch64::SUBSXrx:
1400 case AArch64::SUBSXrx64: {
1401 unsigned Imm = MI.getOperand(3).getImm();
1402 switch (AArch64_AM::getArithExtendType(Imm)) {
1403 default:
1404 return false;
1405 case AArch64_AM::UXTB:
1406 case AArch64_AM::UXTH:
1407 case AArch64_AM::UXTW:
1408 case AArch64_AM::UXTX:
1409 return AArch64_AM::getArithShiftValue(Imm) == 0;
1410 }
1411 }
1412
1413 case AArch64::LDRBBroW:
1414 case AArch64::LDRBBroX:
1415 case AArch64::LDRBroW:
1416 case AArch64::LDRBroX:
1417 case AArch64::LDRDroW:
1418 case AArch64::LDRDroX:
1419 case AArch64::LDRHHroW:
1420 case AArch64::LDRHHroX:
1421 case AArch64::LDRHroW:
1422 case AArch64::LDRHroX:
1423 case AArch64::LDRQroW:
1424 case AArch64::LDRQroX:
1425 case AArch64::LDRSBWroW:
1426 case AArch64::LDRSBWroX:
1427 case AArch64::LDRSBXroW:
1428 case AArch64::LDRSBXroX:
1429 case AArch64::LDRSHWroW:
1430 case AArch64::LDRSHWroX:
1431 case AArch64::LDRSHXroW:
1432 case AArch64::LDRSHXroX:
1433 case AArch64::LDRSWroW:
1434 case AArch64::LDRSWroX:
1435 case AArch64::LDRSroW:
1436 case AArch64::LDRSroX:
1437 case AArch64::LDRWroW:
1438 case AArch64::LDRWroX:
1439 case AArch64::LDRXroW:
1440 case AArch64::LDRXroX:
1441 case AArch64::PRFMroW:
1442 case AArch64::PRFMroX:
1443 case AArch64::STRBBroW:
1444 case AArch64::STRBBroX:
1445 case AArch64::STRBroW:
1446 case AArch64::STRBroX:
1447 case AArch64::STRDroW:
1448 case AArch64::STRDroX:
1449 case AArch64::STRHHroW:
1450 case AArch64::STRHHroX:
1451 case AArch64::STRHroW:
1452 case AArch64::STRHroX:
1453 case AArch64::STRQroW:
1454 case AArch64::STRQroX:
1455 case AArch64::STRSroW:
1456 case AArch64::STRSroX:
1457 case AArch64::STRWroW:
1458 case AArch64::STRWroX:
1459 case AArch64::STRXroW:
1460 case AArch64::STRXroX: {
1461 unsigned IsSigned = MI.getOperand(3).getImm();
1462 return !IsSigned;
1463 }
1464 }
1465}
1466
1467bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1468 unsigned Opc = MI.getOpcode();
1469 switch (Opc) {
1470 default:
1471 return false;
1472 case AArch64::SEH_StackAlloc:
1473 case AArch64::SEH_SaveFPLR:
1474 case AArch64::SEH_SaveFPLR_X:
1475 case AArch64::SEH_SaveReg:
1476 case AArch64::SEH_SaveReg_X:
1477 case AArch64::SEH_SaveRegP:
1478 case AArch64::SEH_SaveRegP_X:
1479 case AArch64::SEH_SaveFReg:
1480 case AArch64::SEH_SaveFReg_X:
1481 case AArch64::SEH_SaveFRegP:
1482 case AArch64::SEH_SaveFRegP_X:
1483 case AArch64::SEH_SetFP:
1484 case AArch64::SEH_AddFP:
1485 case AArch64::SEH_Nop:
1486 case AArch64::SEH_PrologEnd:
1487 case AArch64::SEH_EpilogStart:
1488 case AArch64::SEH_EpilogEnd:
1489 case AArch64::SEH_PACSignLR:
1490 case AArch64::SEH_SaveAnyRegI:
1491 case AArch64::SEH_SaveAnyRegIP:
1492 case AArch64::SEH_SaveAnyRegQP:
1493 case AArch64::SEH_SaveAnyRegQPX:
1494 case AArch64::SEH_AllocZ:
1495 case AArch64::SEH_SaveZReg:
1496 case AArch64::SEH_SavePReg:
1497 return true;
1498 }
1499}
1500
1502 Register &SrcReg, Register &DstReg,
1503 unsigned &SubIdx) const {
1504 switch (MI.getOpcode()) {
1505 default:
1506 return false;
1507 case AArch64::SBFMXri: // aka sxtw
1508 case AArch64::UBFMXri: // aka uxtw
1509 // Check for the 32 -> 64 bit extension case, these instructions can do
1510 // much more.
1511 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1512 return false;
1513 // This is a signed or unsigned 32 -> 64 bit extension.
1514 SrcReg = MI.getOperand(1).getReg();
1515 DstReg = MI.getOperand(0).getReg();
1516 SubIdx = AArch64::sub_32;
1517 return true;
1518 }
1519}
1520
1522 const MachineInstr &MIa, const MachineInstr &MIb) const {
1524 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1525 int64_t OffsetA = 0, OffsetB = 0;
1526 TypeSize WidthA(0, false), WidthB(0, false);
1527 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1528
1529 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1530 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1531
1534 return false;
1535
1536 // Retrieve the base, offset from the base and width. Width
1537 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1538 // base are identical, and the offset of a lower memory access +
1539 // the width doesn't overlap the offset of a higher memory access,
1540 // then the memory accesses are different.
1541 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1542 // are assumed to have the same scale (vscale).
1543 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1544 WidthA, TRI) &&
1545 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1546 WidthB, TRI)) {
1547 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1548 OffsetAIsScalable == OffsetBIsScalable) {
1549 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1550 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1551 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1552 if (LowWidth.isScalable() == OffsetAIsScalable &&
1553 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1554 return true;
1555 }
1556 }
1557 return false;
1558}
1559
1561 const MachineBasicBlock *MBB,
1562 const MachineFunction &MF) const {
1564 return true;
1565
1566 // Do not move an instruction that can be recognized as a branch target.
1567 if (hasBTISemantics(MI))
1568 return true;
1569
1570 switch (MI.getOpcode()) {
1571 case AArch64::HINT:
1572 // CSDB hints are scheduling barriers.
1573 if (MI.getOperand(0).getImm() == 0x14)
1574 return true;
1575 break;
1576 case AArch64::DSB:
1577 case AArch64::ISB:
1578 // DSB and ISB also are scheduling barriers.
1579 return true;
1580 case AArch64::MSRpstatesvcrImm1:
1581 // SMSTART and SMSTOP are also scheduling barriers.
1582 return true;
1583 default:;
1584 }
1585 if (isSEHInstruction(MI))
1586 return true;
1587 auto Next = std::next(MI.getIterator());
1588 return Next != MBB->end() && Next->isCFIInstruction();
1589}
1590
1591/// analyzeCompare - For a comparison instruction, return the source registers
1592/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1593/// Return true if the comparison instruction can be analyzed.
1595 Register &SrcReg2, int64_t &CmpMask,
1596 int64_t &CmpValue) const {
1597 // The first operand can be a frame index where we'd normally expect a
1598 // register.
1599 // FIXME: Pass subregisters out of analyzeCompare
1600 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1601 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1602 return false;
1603
1604 switch (MI.getOpcode()) {
1605 default:
1606 break;
1607 case AArch64::PTEST_PP:
1608 case AArch64::PTEST_PP_ANY:
1609 case AArch64::PTEST_PP_FIRST:
1610 SrcReg = MI.getOperand(0).getReg();
1611 SrcReg2 = MI.getOperand(1).getReg();
1612 if (MI.getOperand(2).getSubReg())
1613 return false;
1614
1615 // Not sure about the mask and value for now...
1616 CmpMask = ~0;
1617 CmpValue = 0;
1618 return true;
1619 case AArch64::SUBSWrr:
1620 case AArch64::SUBSWrs:
1621 case AArch64::SUBSWrx:
1622 case AArch64::SUBSXrr:
1623 case AArch64::SUBSXrs:
1624 case AArch64::SUBSXrx:
1625 case AArch64::ADDSWrr:
1626 case AArch64::ADDSWrs:
1627 case AArch64::ADDSWrx:
1628 case AArch64::ADDSXrr:
1629 case AArch64::ADDSXrs:
1630 case AArch64::ADDSXrx:
1631 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1632 SrcReg = MI.getOperand(1).getReg();
1633 SrcReg2 = MI.getOperand(2).getReg();
1634
1635 // FIXME: Pass subregisters out of analyzeCompare
1636 if (MI.getOperand(2).getSubReg())
1637 return false;
1638
1639 CmpMask = ~0;
1640 CmpValue = 0;
1641 return true;
1642 case AArch64::SUBSWri:
1643 case AArch64::ADDSWri:
1644 case AArch64::SUBSXri:
1645 case AArch64::ADDSXri:
1646 SrcReg = MI.getOperand(1).getReg();
1647 SrcReg2 = 0;
1648 CmpMask = ~0;
1649 CmpValue = MI.getOperand(2).getImm();
1650 return true;
1651 case AArch64::ANDSWri:
1652 case AArch64::ANDSXri:
1653 // ANDS does not use the same encoding scheme as the others xxxS
1654 // instructions.
1655 SrcReg = MI.getOperand(1).getReg();
1656 SrcReg2 = 0;
1657 CmpMask = ~0;
1659 MI.getOperand(2).getImm(),
1660 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1661 return true;
1662 }
1663
1664 return false;
1665}
1666
1668 MachineBasicBlock *MBB = Instr.getParent();
1669 assert(MBB && "Can't get MachineBasicBlock here");
1670 MachineFunction *MF = MBB->getParent();
1671 assert(MF && "Can't get MachineFunction here");
1674 MachineRegisterInfo *MRI = &MF->getRegInfo();
1675
1676 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1677 ++OpIdx) {
1678 MachineOperand &MO = Instr.getOperand(OpIdx);
1679 const TargetRegisterClass *OpRegCstraints =
1680 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1681
1682 // If there's no constraint, there's nothing to do.
1683 if (!OpRegCstraints)
1684 continue;
1685 // If the operand is a frame index, there's nothing to do here.
1686 // A frame index operand will resolve correctly during PEI.
1687 if (MO.isFI())
1688 continue;
1689
1690 assert(MO.isReg() &&
1691 "Operand has register constraints without being a register!");
1692
1693 Register Reg = MO.getReg();
1694 if (Reg.isPhysical()) {
1695 if (!OpRegCstraints->contains(Reg))
1696 return false;
1697 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1698 !MRI->constrainRegClass(Reg, OpRegCstraints))
1699 return false;
1700 }
1701
1702 return true;
1703}
1704
1705/// Return the opcode that does not set flags when possible - otherwise
1706/// return the original opcode. The caller is responsible to do the actual
1707/// substitution and legality checking.
1709 // Don't convert all compare instructions, because for some the zero register
1710 // encoding becomes the sp register.
1711 bool MIDefinesZeroReg = false;
1712 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1713 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1714 MIDefinesZeroReg = true;
1715
1716 switch (MI.getOpcode()) {
1717 default:
1718 return MI.getOpcode();
1719 case AArch64::ADDSWrr:
1720 return AArch64::ADDWrr;
1721 case AArch64::ADDSWri:
1722 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1723 case AArch64::ADDSWrs:
1724 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1725 case AArch64::ADDSWrx:
1726 return AArch64::ADDWrx;
1727 case AArch64::ADDSXrr:
1728 return AArch64::ADDXrr;
1729 case AArch64::ADDSXri:
1730 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1731 case AArch64::ADDSXrs:
1732 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1733 case AArch64::ADDSXrx:
1734 return AArch64::ADDXrx;
1735 case AArch64::SUBSWrr:
1736 return AArch64::SUBWrr;
1737 case AArch64::SUBSWri:
1738 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1739 case AArch64::SUBSWrs:
1740 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1741 case AArch64::SUBSWrx:
1742 return AArch64::SUBWrx;
1743 case AArch64::SUBSXrr:
1744 return AArch64::SUBXrr;
1745 case AArch64::SUBSXri:
1746 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1747 case AArch64::SUBSXrs:
1748 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1749 case AArch64::SUBSXrx:
1750 return AArch64::SUBXrx;
1751 }
1752}
1753
1754enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1755
1756/// True when condition flags are accessed (either by writing or reading)
1757/// on the instruction trace starting at From and ending at To.
1758///
1759/// Note: If From and To are from different blocks it's assumed CC are accessed
1760/// on the path.
1763 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1764 // Early exit if To is at the beginning of the BB.
1765 if (To == To->getParent()->begin())
1766 return true;
1767
1768 // Check whether the instructions are in the same basic block
1769 // If not, assume the condition flags might get modified somewhere.
1770 if (To->getParent() != From->getParent())
1771 return true;
1772
1773 // From must be above To.
1774 assert(std::any_of(
1775 ++To.getReverse(), To->getParent()->rend(),
1776 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1777
1778 // We iterate backward starting at \p To until we hit \p From.
1779 for (const MachineInstr &Instr :
1781 if (((AccessToCheck & AK_Write) &&
1782 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1783 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1784 return true;
1785 }
1786 return false;
1787}
1788
1789std::optional<unsigned>
1790AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1791 MachineInstr *Pred,
1792 const MachineRegisterInfo *MRI) const {
1793 unsigned MaskOpcode = Mask->getOpcode();
1794 unsigned PredOpcode = Pred->getOpcode();
1795 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1796 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1797
1798 if (PredIsWhileLike) {
1799 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1800 // instruction and the condition is "any" since WHILcc does an implicit
1801 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1802 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1803 return PredOpcode;
1804
1805 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1806 // redundant since WHILE performs an implicit PTEST with an all active
1807 // mask.
1808 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1809 getElementSizeForOpcode(MaskOpcode) ==
1810 getElementSizeForOpcode(PredOpcode))
1811 return PredOpcode;
1812
1813 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1814 // WHILEcc performs an implicit PTEST with an all active mask, setting
1815 // the N flag as the PTEST_FIRST would.
1816 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1817 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1818 return PredOpcode;
1819
1820 return {};
1821 }
1822
1823 if (PredIsPTestLike) {
1824 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1825 // instruction that sets the flags as PTEST would and the condition is
1826 // "any" since PG is always a subset of the governing predicate of the
1827 // ptest-like instruction.
1828 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1829 return PredOpcode;
1830
1831 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1832
1833 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1834 // to look through a copy and try again. This is because some instructions
1835 // take a predicate whose register class is a subset of its result class.
1836 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1837 PTestLikeMask->getOperand(1).getReg().isVirtual())
1838 PTestLikeMask =
1839 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1840
1841 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1842 // the element size matches and either the PTEST_LIKE instruction uses
1843 // the same all active mask or the condition is "any".
1844 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1845 getElementSizeForOpcode(MaskOpcode) ==
1846 getElementSizeForOpcode(PredOpcode)) {
1847 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1848 return PredOpcode;
1849 }
1850
1851 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1852 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1853 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1854 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1855 // performed by the compare could consider fewer lanes for these element
1856 // sizes.
1857 //
1858 // For example, consider
1859 //
1860 // ptrue p0.b ; P0=1111-1111-1111-1111
1861 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1862 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1863 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1864 // ; ^ last active
1865 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1866 // ; ^ last active
1867 //
1868 // where the compare generates a canonical all active 32-bit predicate
1869 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1870 // active flag, whereas the PTEST instruction with the same mask doesn't.
1871 // For PTEST_ANY this doesn't apply as the flags in this case would be
1872 // identical regardless of element size.
1873 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1874 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1875 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1876 return PredOpcode;
1877
1878 return {};
1879 }
1880
1881 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1882 // opcode so the PTEST becomes redundant.
1883 switch (PredOpcode) {
1884 case AArch64::AND_PPzPP:
1885 case AArch64::BIC_PPzPP:
1886 case AArch64::EOR_PPzPP:
1887 case AArch64::NAND_PPzPP:
1888 case AArch64::NOR_PPzPP:
1889 case AArch64::ORN_PPzPP:
1890 case AArch64::ORR_PPzPP:
1891 case AArch64::BRKA_PPzP:
1892 case AArch64::BRKPA_PPzPP:
1893 case AArch64::BRKB_PPzP:
1894 case AArch64::BRKPB_PPzPP:
1895 case AArch64::RDFFR_PPz: {
1896 // Check to see if our mask is the same. If not the resulting flag bits
1897 // may be different and we can't remove the ptest.
1898 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1899 if (Mask != PredMask)
1900 return {};
1901 break;
1902 }
1903 case AArch64::BRKN_PPzP: {
1904 // BRKN uses an all active implicit mask to set flags unlike the other
1905 // flag-setting instructions.
1906 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1907 if ((MaskOpcode != AArch64::PTRUE_B) ||
1908 (Mask->getOperand(1).getImm() != 31))
1909 return {};
1910 break;
1911 }
1912 case AArch64::PTRUE_B:
1913 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1914 break;
1915 default:
1916 // Bail out if we don't recognize the input
1917 return {};
1918 }
1919
1920 return convertToFlagSettingOpc(PredOpcode);
1921}
1922
1923/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1924/// operation which could set the flags in an identical manner
1925bool AArch64InstrInfo::optimizePTestInstr(
1926 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1927 const MachineRegisterInfo *MRI) const {
1928 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1929 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1930
1931 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1932 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1933 // before the branch to extract each subregister.
1934 auto Op = Pred->getOperand(1);
1935 if (Op.isReg() && Op.getReg().isVirtual() &&
1936 Op.getSubReg() == AArch64::psub0)
1937 Pred = MRI->getUniqueVRegDef(Op.getReg());
1938 }
1939
1940 unsigned PredOpcode = Pred->getOpcode();
1941 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1942 if (!NewOp)
1943 return false;
1944
1945 const TargetRegisterInfo *TRI = &getRegisterInfo();
1946
1947 // If another instruction between Pred and PTest accesses flags, don't remove
1948 // the ptest or update the earlier instruction to modify them.
1949 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1950 return false;
1951
1952 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1953 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1954 // operand to be replaced with an equivalent instruction that also sets the
1955 // flags.
1956 PTest->eraseFromParent();
1957 if (*NewOp != PredOpcode) {
1958 Pred->setDesc(get(*NewOp));
1959 bool succeeded = UpdateOperandRegClass(*Pred);
1960 (void)succeeded;
1961 assert(succeeded && "Operands have incompatible register classes!");
1962 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1963 }
1964
1965 // Ensure that the flags def is live.
1966 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1967 unsigned i = 0, e = Pred->getNumOperands();
1968 for (; i != e; ++i) {
1969 MachineOperand &MO = Pred->getOperand(i);
1970 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1971 MO.setIsDead(false);
1972 break;
1973 }
1974 }
1975 }
1976 return true;
1977}
1978
1979/// Try to optimize a compare instruction. A compare instruction is an
1980/// instruction which produces AArch64::NZCV. It can be truly compare
1981/// instruction
1982/// when there are no uses of its destination register.
1983///
1984/// The following steps are tried in order:
1985/// 1. Convert CmpInstr into an unconditional version.
1986/// 2. Remove CmpInstr if above there is an instruction producing a needed
1987/// condition code or an instruction which can be converted into such an
1988/// instruction.
1989/// Only comparison with zero is supported.
1991 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1992 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1993 assert(CmpInstr.getParent());
1994 assert(MRI);
1995
1996 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1997 int DeadNZCVIdx =
1998 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1999 if (DeadNZCVIdx != -1) {
2000 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
2001 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
2002 CmpInstr.eraseFromParent();
2003 return true;
2004 }
2005 unsigned Opc = CmpInstr.getOpcode();
2006 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
2007 if (NewOpc == Opc)
2008 return false;
2009 const MCInstrDesc &MCID = get(NewOpc);
2010 CmpInstr.setDesc(MCID);
2011 CmpInstr.removeOperand(DeadNZCVIdx);
2012 bool succeeded = UpdateOperandRegClass(CmpInstr);
2013 (void)succeeded;
2014 assert(succeeded && "Some operands reg class are incompatible!");
2015 return true;
2016 }
2017
2018 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
2019 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
2020 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
2021 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
2022
2023 if (SrcReg2 != 0)
2024 return false;
2025
2026 // CmpInstr is a Compare instruction if destination register is not used.
2027 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
2028 return false;
2029
2030 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
2031 return true;
2032 return (CmpValue == 0 || CmpValue == 1) &&
2033 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
2034}
2035
2036/// Get opcode of S version of Instr.
2037/// If Instr is S version its opcode is returned.
2038/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
2039/// or we are not interested in it.
2040static unsigned sForm(MachineInstr &Instr) {
2041 switch (Instr.getOpcode()) {
2042 default:
2043 return AArch64::INSTRUCTION_LIST_END;
2044
2045 case AArch64::ADDSWrr:
2046 case AArch64::ADDSWri:
2047 case AArch64::ADDSXrr:
2048 case AArch64::ADDSXri:
2049 case AArch64::ADDSWrx:
2050 case AArch64::ADDSXrx:
2051 case AArch64::SUBSWrr:
2052 case AArch64::SUBSWri:
2053 case AArch64::SUBSWrx:
2054 case AArch64::SUBSXrr:
2055 case AArch64::SUBSXri:
2056 case AArch64::SUBSXrx:
2057 case AArch64::ANDSWri:
2058 case AArch64::ANDSWrr:
2059 case AArch64::ANDSWrs:
2060 case AArch64::ANDSXri:
2061 case AArch64::ANDSXrr:
2062 case AArch64::ANDSXrs:
2063 case AArch64::BICSWrr:
2064 case AArch64::BICSXrr:
2065 case AArch64::BICSWrs:
2066 case AArch64::BICSXrs:
2067 return Instr.getOpcode();
2068
2069 case AArch64::ADDWrr:
2070 return AArch64::ADDSWrr;
2071 case AArch64::ADDWri:
2072 return AArch64::ADDSWri;
2073 case AArch64::ADDXrr:
2074 return AArch64::ADDSXrr;
2075 case AArch64::ADDXri:
2076 return AArch64::ADDSXri;
2077 case AArch64::ADDWrx:
2078 return AArch64::ADDSWrx;
2079 case AArch64::ADDXrx:
2080 return AArch64::ADDSXrx;
2081 case AArch64::ADCWr:
2082 return AArch64::ADCSWr;
2083 case AArch64::ADCXr:
2084 return AArch64::ADCSXr;
2085 case AArch64::SUBWrr:
2086 return AArch64::SUBSWrr;
2087 case AArch64::SUBWri:
2088 return AArch64::SUBSWri;
2089 case AArch64::SUBXrr:
2090 return AArch64::SUBSXrr;
2091 case AArch64::SUBXri:
2092 return AArch64::SUBSXri;
2093 case AArch64::SUBWrx:
2094 return AArch64::SUBSWrx;
2095 case AArch64::SUBXrx:
2096 return AArch64::SUBSXrx;
2097 case AArch64::SBCWr:
2098 return AArch64::SBCSWr;
2099 case AArch64::SBCXr:
2100 return AArch64::SBCSXr;
2101 case AArch64::ANDWri:
2102 return AArch64::ANDSWri;
2103 case AArch64::ANDXri:
2104 return AArch64::ANDSXri;
2105 case AArch64::ANDWrr:
2106 return AArch64::ANDSWrr;
2107 case AArch64::ANDWrs:
2108 return AArch64::ANDSWrs;
2109 case AArch64::ANDXrr:
2110 return AArch64::ANDSXrr;
2111 case AArch64::ANDXrs:
2112 return AArch64::ANDSXrs;
2113 case AArch64::BICWrr:
2114 return AArch64::BICSWrr;
2115 case AArch64::BICXrr:
2116 return AArch64::BICSXrr;
2117 case AArch64::BICWrs:
2118 return AArch64::BICSWrs;
2119 case AArch64::BICXrs:
2120 return AArch64::BICSXrs;
2121 }
2122}
2123
2124/// Check if AArch64::NZCV should be alive in successors of MBB.
2126 for (auto *BB : MBB->successors())
2127 if (BB->isLiveIn(AArch64::NZCV))
2128 return true;
2129 return false;
2130}
2131
2132/// \returns The condition code operand index for \p Instr if it is a branch
2133/// or select and -1 otherwise.
2134int AArch64InstrInfo::findCondCodeUseOperandIdxForBranchOrSelect(
2135 const MachineInstr &Instr) {
2136 switch (Instr.getOpcode()) {
2137 default:
2138 return -1;
2139
2140 case AArch64::Bcc: {
2141 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2142 assert(Idx >= 2);
2143 return Idx - 2;
2144 }
2145
2146 case AArch64::CSINVWr:
2147 case AArch64::CSINVXr:
2148 case AArch64::CSINCWr:
2149 case AArch64::CSINCXr:
2150 case AArch64::CSELWr:
2151 case AArch64::CSELXr:
2152 case AArch64::CSNEGWr:
2153 case AArch64::CSNEGXr:
2154 case AArch64::FCSELSrrr:
2155 case AArch64::FCSELDrrr: {
2156 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2157 assert(Idx >= 1);
2158 return Idx - 1;
2159 }
2160 }
2161}
2162
2163/// Find a condition code used by the instruction.
2164/// Returns AArch64CC::Invalid if either the instruction does not use condition
2165/// codes or we don't optimize CmpInstr in the presence of such instructions.
2167 int CCIdx =
2168 AArch64InstrInfo::findCondCodeUseOperandIdxForBranchOrSelect(Instr);
2169 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
2170 Instr.getOperand(CCIdx).getImm())
2172}
2173
2176 UsedNZCV UsedFlags;
2177 switch (CC) {
2178 default:
2179 break;
2180
2181 case AArch64CC::EQ: // Z set
2182 case AArch64CC::NE: // Z clear
2183 UsedFlags.Z = true;
2184 break;
2185
2186 case AArch64CC::HI: // Z clear and C set
2187 case AArch64CC::LS: // Z set or C clear
2188 UsedFlags.Z = true;
2189 [[fallthrough]];
2190 case AArch64CC::HS: // C set
2191 case AArch64CC::LO: // C clear
2192 UsedFlags.C = true;
2193 break;
2194
2195 case AArch64CC::MI: // N set
2196 case AArch64CC::PL: // N clear
2197 UsedFlags.N = true;
2198 break;
2199
2200 case AArch64CC::VS: // V set
2201 case AArch64CC::VC: // V clear
2202 UsedFlags.V = true;
2203 break;
2204
2205 case AArch64CC::GT: // Z clear, N and V the same
2206 case AArch64CC::LE: // Z set, N and V differ
2207 UsedFlags.Z = true;
2208 [[fallthrough]];
2209 case AArch64CC::GE: // N and V the same
2210 case AArch64CC::LT: // N and V differ
2211 UsedFlags.N = true;
2212 UsedFlags.V = true;
2213 break;
2214 }
2215 return UsedFlags;
2216}
2217
2218/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
2219/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
2220/// \returns std::nullopt otherwise.
2221///
2222/// Collect instructions using that flags in \p CCUseInstrs if provided.
2223std::optional<UsedNZCV>
2225 const TargetRegisterInfo &TRI,
2226 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
2227 MachineBasicBlock *CmpParent = CmpInstr.getParent();
2228 if (MI.getParent() != CmpParent)
2229 return std::nullopt;
2230
2231 if (areCFlagsAliveInSuccessors(CmpParent))
2232 return std::nullopt;
2233
2234 UsedNZCV NZCVUsedAfterCmp;
2236 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
2237 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
2239 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
2240 return std::nullopt;
2241 NZCVUsedAfterCmp |= getUsedNZCV(CC);
2242 if (CCUseInstrs)
2243 CCUseInstrs->push_back(&Instr);
2244 }
2245 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
2246 break;
2247 }
2248 return NZCVUsedAfterCmp;
2249}
2250
2251static bool isADDSRegImm(unsigned Opcode) {
2252 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
2253}
2254
2255static bool isSUBSRegImm(unsigned Opcode) {
2256 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
2257}
2258
2260 unsigned Opc = sForm(MI);
2261 switch (Opc) {
2262 case AArch64::ANDSWri:
2263 case AArch64::ANDSWrr:
2264 case AArch64::ANDSWrs:
2265 case AArch64::ANDSXri:
2266 case AArch64::ANDSXrr:
2267 case AArch64::ANDSXrs:
2268 case AArch64::BICSWrr:
2269 case AArch64::BICSXrr:
2270 case AArch64::BICSWrs:
2271 case AArch64::BICSXrs:
2272 return true;
2273 default:
2274 return false;
2275 }
2276}
2277
2278/// Check if CmpInstr can be substituted by MI.
2279///
2280/// CmpInstr can be substituted:
2281/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2282/// - and, MI and CmpInstr are from the same MachineBB
2283/// - and, condition flags are not alive in successors of the CmpInstr parent
2284/// - and, if MI opcode is the S form there must be no defs of flags between
2285/// MI and CmpInstr
2286/// or if MI opcode is not the S form there must be neither defs of flags
2287/// nor uses of flags between MI and CmpInstr.
2288/// - and, if C/V flags are not used after CmpInstr
2289/// or if N flag is used but MI produces poison value if signed overflow
2290/// occurs.
2292 const TargetRegisterInfo &TRI) {
2293 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
2294 // that may or may not set flags.
2295 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
2296
2297 const unsigned CmpOpcode = CmpInstr.getOpcode();
2298 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
2299 return false;
2300
2301 assert((CmpInstr.getOperand(2).isImm() &&
2302 CmpInstr.getOperand(2).getImm() == 0) &&
2303 "Caller guarantees that CmpInstr compares with constant 0");
2304
2305 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
2306 if (!NZVCUsed || NZVCUsed->C)
2307 return false;
2308
2309 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
2310 // '%vreg = add ...' or '%vreg = sub ...'.
2311 // Condition flag V is used to indicate signed overflow.
2312 // 1) MI and CmpInstr set N and V to the same value.
2313 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
2314 // signed overflow occurs, so CmpInstr could still be simplified away.
2315 // Note that Ands and Bics instructions always clear the V flag.
2316 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
2317 return false;
2318
2319 AccessKind AccessToCheck = AK_Write;
2320 if (sForm(MI) != MI.getOpcode())
2321 AccessToCheck = AK_All;
2322 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
2323}
2324
2325/// Substitute an instruction comparing to zero with another instruction
2326/// which produces needed condition flags.
2327///
2328/// Return true on success.
2329bool AArch64InstrInfo::substituteCmpToZero(
2330 MachineInstr &CmpInstr, unsigned SrcReg,
2331 const MachineRegisterInfo &MRI) const {
2332 // Get the unique definition of SrcReg.
2333 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2334 if (!MI)
2335 return false;
2336
2337 const TargetRegisterInfo &TRI = getRegisterInfo();
2338
2339 unsigned NewOpc = sForm(*MI);
2340 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
2341 return false;
2342
2343 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
2344 return false;
2345
2346 // Update the instruction to set NZCV.
2347 MI->setDesc(get(NewOpc));
2348 CmpInstr.eraseFromParent();
2350 (void)succeeded;
2351 assert(succeeded && "Some operands reg class are incompatible!");
2352 MI->addRegisterDefined(AArch64::NZCV, &TRI);
2353 return true;
2354}
2355
2356/// \returns True if \p CmpInstr can be removed.
2357///
2358/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
2359/// codes used in \p CCUseInstrs must be inverted.
2361 int CmpValue, const TargetRegisterInfo &TRI,
2363 bool &IsInvertCC) {
2364 assert((CmpValue == 0 || CmpValue == 1) &&
2365 "Only comparisons to 0 or 1 considered for removal!");
2366
2367 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2368 unsigned MIOpc = MI.getOpcode();
2369 if (MIOpc == AArch64::CSINCWr) {
2370 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2371 MI.getOperand(2).getReg() != AArch64::WZR)
2372 return false;
2373 } else if (MIOpc == AArch64::CSINCXr) {
2374 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2375 MI.getOperand(2).getReg() != AArch64::XZR)
2376 return false;
2377 } else {
2378 return false;
2379 }
2381 if (MICC == AArch64CC::Invalid)
2382 return false;
2383
2384 // NZCV needs to be defined
2385 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2386 return false;
2387
2388 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2389 const unsigned CmpOpcode = CmpInstr.getOpcode();
2390 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2391 if (CmpValue && !IsSubsRegImm)
2392 return false;
2393 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2394 return false;
2395
2396 // MI conditions allowed: eq, ne, mi, pl
2397 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2398 if (MIUsedNZCV.C || MIUsedNZCV.V)
2399 return false;
2400
2401 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2402 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2403 // Condition flags are not used in CmpInstr basic block successors and only
2404 // Z or N flags allowed to be used after CmpInstr within its basic block
2405 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2406 return false;
2407 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2408 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2409 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2410 return false;
2411 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2412 if (MIUsedNZCV.N && !CmpValue)
2413 return false;
2414
2415 // There must be no defs of flags between MI and CmpInstr
2416 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2417 return false;
2418
2419 // Condition code is inverted in the following cases:
2420 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2421 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2422 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2423 (!CmpValue && MICC == AArch64CC::NE);
2424 return true;
2425}
2426
2427/// Remove comparison in csinc-cmp sequence
2428///
2429/// Examples:
2430/// 1. \code
2431/// csinc w9, wzr, wzr, ne
2432/// cmp w9, #0
2433/// b.eq
2434/// \endcode
2435/// to
2436/// \code
2437/// csinc w9, wzr, wzr, ne
2438/// b.ne
2439/// \endcode
2440///
2441/// 2. \code
2442/// csinc x2, xzr, xzr, mi
2443/// cmp x2, #1
2444/// b.pl
2445/// \endcode
2446/// to
2447/// \code
2448/// csinc x2, xzr, xzr, mi
2449/// b.pl
2450/// \endcode
2451///
2452/// \param CmpInstr comparison instruction
2453/// \return True when comparison removed
2454bool AArch64InstrInfo::removeCmpToZeroOrOne(
2455 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2456 const MachineRegisterInfo &MRI) const {
2457 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2458 if (!MI)
2459 return false;
2460 const TargetRegisterInfo &TRI = getRegisterInfo();
2461 SmallVector<MachineInstr *, 4> CCUseInstrs;
2462 bool IsInvertCC = false;
2463 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2464 IsInvertCC))
2465 return false;
2466 // Make transformation
2467 CmpInstr.eraseFromParent();
2468 if (IsInvertCC) {
2469 // Invert condition codes in CmpInstr CC users
2470 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2471 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2472 assert(Idx >= 0 && "Unexpected instruction using CC.");
2473 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2475 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2476 CCOperand.setImm(CCUse);
2477 }
2478 }
2479 return true;
2480}
2481
2482bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2483 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2484 MI.getOpcode() != AArch64::CATCHRET)
2485 return false;
2486
2487 MachineBasicBlock &MBB = *MI.getParent();
2488 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2489 auto TRI = Subtarget.getRegisterInfo();
2490 DebugLoc DL = MI.getDebugLoc();
2491
2492 if (MI.getOpcode() == AArch64::CATCHRET) {
2493 // Skip to the first instruction before the epilog.
2494 const TargetInstrInfo *TII =
2496 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2498 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2499 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2500 FirstEpilogSEH != MBB.begin())
2501 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2502 if (FirstEpilogSEH != MBB.begin())
2503 FirstEpilogSEH = std::next(FirstEpilogSEH);
2504 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2505 .addReg(AArch64::X0, RegState::Define)
2506 .addMBB(TargetMBB);
2507 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2508 .addReg(AArch64::X0, RegState::Define)
2509 .addReg(AArch64::X0)
2510 .addMBB(TargetMBB)
2511 .addImm(0);
2512 TargetMBB->setMachineBlockAddressTaken();
2513 return true;
2514 }
2515
2516 Register Reg = MI.getOperand(0).getReg();
2518 if (M.getStackProtectorGuard() == "sysreg") {
2519 const AArch64SysReg::SysReg *SrcReg =
2520 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2521 if (!SrcReg)
2522 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2523
2524 // mrs xN, sysreg
2525 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2527 .addImm(SrcReg->Encoding);
2528 int Offset = M.getStackProtectorGuardOffset();
2529 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2530 // ldr xN, [xN, #offset]
2531 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2532 .addDef(Reg)
2534 .addImm(Offset / 8);
2535 } else if (Offset >= -256 && Offset <= 255) {
2536 // ldur xN, [xN, #offset]
2537 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2538 .addDef(Reg)
2540 .addImm(Offset);
2541 } else if (Offset >= -4095 && Offset <= 4095) {
2542 if (Offset > 0) {
2543 // add xN, xN, #offset
2544 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2545 .addDef(Reg)
2547 .addImm(Offset)
2548 .addImm(0);
2549 } else {
2550 // sub xN, xN, #offset
2551 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2552 .addDef(Reg)
2554 .addImm(-Offset)
2555 .addImm(0);
2556 }
2557 // ldr xN, [xN]
2558 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2559 .addDef(Reg)
2561 .addImm(0);
2562 } else {
2563 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2564 // than 23760.
2565 // It might be nice to use AArch64::MOVi32imm here, which would get
2566 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2567 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2568 // AArch64FrameLowering might help us find such a scratch register
2569 // though. If we failed to find a scratch register, we could emit a
2570 // stream of add instructions to build up the immediate. Or, we could try
2571 // to insert a AArch64::MOVi32imm before register allocation so that we
2572 // didn't need to scavenge for a scratch register.
2573 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2574 }
2575 MBB.erase(MI);
2576 return true;
2577 }
2578
2579 const GlobalValue *GV =
2580 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2581 const TargetMachine &TM = MBB.getParent()->getTarget();
2582 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2583 const unsigned char MO_NC = AArch64II::MO_NC;
2584
2585 unsigned GuardWidth = M.getStackProtectorGuardValueWidth().value_or(
2586 Subtarget.isTargetILP32() ? 4 : 8);
2587 if (GuardWidth != 4 && GuardWidth != 8)
2588 report_fatal_error("Unsupported stack protector value width");
2589 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2590 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2591 .addGlobalAddress(GV, 0, OpFlags);
2592 if (GuardWidth == 4) {
2593 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2594 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2595 .addDef(Reg32, RegState::Dead)
2597 .addImm(0)
2598 .addMemOperand(*MI.memoperands_begin())
2600 } else {
2601 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2603 .addImm(0)
2604 .addMemOperand(*MI.memoperands_begin());
2605 }
2606 } else if (TM.getCodeModel() == CodeModel::Large) {
2607 if (GuardWidth == 4)
2608 report_fatal_error("Large code model with 4-byte stack protector not yet "
2609 "supported");
2610 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2611 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2612 .addImm(0);
2613 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2615 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2616 .addImm(16);
2617 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2619 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2620 .addImm(32);
2621 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2624 .addImm(48);
2625 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2627 .addImm(0)
2628 .addMemOperand(*MI.memoperands_begin());
2629 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2630 // FIXME: This is computing the stack protector value as a constant
2631 // pc-relative offset, not loading it from memory. Which is maybe
2632 // an interesting compromise in some environments, but it looks like it
2633 // was done accidentally. And it probably shouldn't be tied to the
2634 // code model.
2635 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2636 .addGlobalAddress(GV, 0, OpFlags);
2637 } else {
2638 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2639 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2640 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2641 if (GuardWidth == 4) {
2642 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2643 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2644 .addDef(Reg32, RegState::Dead)
2646 .addGlobalAddress(GV, 0, LoFlags)
2647 .addMemOperand(*MI.memoperands_begin())
2649 } else {
2650 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2652 .addGlobalAddress(GV, 0, LoFlags)
2653 .addMemOperand(*MI.memoperands_begin());
2654 }
2655 }
2656
2657 MBB.erase(MI);
2658
2659 return true;
2660}
2661
2662// Return true if this instruction simply sets its single destination register
2663// to zero. This is equivalent to a register rename of the zero-register.
2665 switch (MI.getOpcode()) {
2666 default:
2667 break;
2668 case AArch64::MOVZWi:
2669 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2670 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2671 assert(MI.getDesc().getNumOperands() == 3 &&
2672 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2673 return true;
2674 }
2675 break;
2676 case AArch64::ANDWri: // and Rd, Rzr, #imm
2677 return MI.getOperand(1).getReg() == AArch64::WZR;
2678 case AArch64::ANDXri:
2679 return MI.getOperand(1).getReg() == AArch64::XZR;
2680 case TargetOpcode::COPY:
2681 return MI.getOperand(1).getReg() == AArch64::WZR;
2682 }
2683 return false;
2684}
2685
2686// Return true if this instruction simply renames a general register without
2687// modifying bits.
2689 switch (MI.getOpcode()) {
2690 default:
2691 break;
2692 case TargetOpcode::COPY: {
2693 // GPR32 copies will by lowered to ORRXrs
2694 Register DstReg = MI.getOperand(0).getReg();
2695 return (AArch64::GPR32RegClass.contains(DstReg) ||
2696 AArch64::GPR64RegClass.contains(DstReg));
2697 }
2698 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2699 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2700 assert(MI.getDesc().getNumOperands() == 4 &&
2701 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2702 return true;
2703 }
2704 break;
2705 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2706 if (MI.getOperand(2).getImm() == 0) {
2707 assert(MI.getDesc().getNumOperands() == 4 &&
2708 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2709 return true;
2710 }
2711 break;
2712 }
2713 return false;
2714}
2715
2716// Return true if this instruction simply renames a general register without
2717// modifying bits.
2719 switch (MI.getOpcode()) {
2720 default:
2721 break;
2722 case TargetOpcode::COPY: {
2723 Register DstReg = MI.getOperand(0).getReg();
2724 return AArch64::FPR128RegClass.contains(DstReg);
2725 }
2726 case AArch64::ORRv16i8:
2727 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2728 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2729 "invalid ORRv16i8 operands");
2730 return true;
2731 }
2732 break;
2733 }
2734 return false;
2735}
2736
2737static bool isFrameLoadOpcode(int Opcode) {
2738 switch (Opcode) {
2739 default:
2740 return false;
2741 case AArch64::LDRWui:
2742 case AArch64::LDRXui:
2743 case AArch64::LDRBui:
2744 case AArch64::LDRHui:
2745 case AArch64::LDRSui:
2746 case AArch64::LDRDui:
2747 case AArch64::LDRQui:
2748 case AArch64::LDR_PXI:
2749 return true;
2750 }
2751}
2752
2754 int &FrameIndex) const {
2755 if (!isFrameLoadOpcode(MI.getOpcode()))
2756 return Register();
2757
2758 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2759 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2760 FrameIndex = MI.getOperand(1).getIndex();
2761 return MI.getOperand(0).getReg();
2762 }
2763 return Register();
2764}
2765
2766static bool isFrameStoreOpcode(int Opcode) {
2767 switch (Opcode) {
2768 default:
2769 return false;
2770 case AArch64::STRWui:
2771 case AArch64::STRXui:
2772 case AArch64::STRBui:
2773 case AArch64::STRHui:
2774 case AArch64::STRSui:
2775 case AArch64::STRDui:
2776 case AArch64::STRQui:
2777 case AArch64::STR_PXI:
2778 return true;
2779 }
2780}
2781
2783 int &FrameIndex) const {
2784 if (!isFrameStoreOpcode(MI.getOpcode()))
2785 return Register();
2786
2787 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2788 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2789 FrameIndex = MI.getOperand(1).getIndex();
2790 return MI.getOperand(0).getReg();
2791 }
2792 return Register();
2793}
2794
2796 int &FrameIndex) const {
2797 if (!isFrameStoreOpcode(MI.getOpcode()))
2798 return Register();
2799
2800 if (Register Reg = isStoreToStackSlot(MI, FrameIndex))
2801 return Reg;
2802
2804 if (hasStoreToStackSlot(MI, Accesses)) {
2805 if (Accesses.size() > 1)
2806 return Register();
2807
2808 FrameIndex =
2809 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2810 ->getFrameIndex();
2811 return MI.getOperand(0).getReg();
2812 }
2813 return Register();
2814}
2815
2817 int &FrameIndex) const {
2818 if (!isFrameLoadOpcode(MI.getOpcode()))
2819 return Register();
2820
2821 if (Register Reg = isLoadFromStackSlot(MI, FrameIndex))
2822 return Reg;
2823
2825 if (hasLoadFromStackSlot(MI, Accesses)) {
2826 if (Accesses.size() > 1)
2827 return Register();
2828
2829 FrameIndex =
2830 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2831 ->getFrameIndex();
2832 return MI.getOperand(0).getReg();
2833 }
2834 return Register();
2835}
2836
2837/// Check all MachineMemOperands for a hint to suppress pairing.
2839 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2840 return MMO->getFlags() & MOSuppressPair;
2841 });
2842}
2843
2844/// Set a flag on the first MachineMemOperand to suppress pairing.
2846 if (MI.memoperands_empty())
2847 return;
2848 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2849}
2850
2851/// Check all MachineMemOperands for a hint that the load/store is strided.
2853 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2854 return MMO->getFlags() & MOStridedAccess;
2855 });
2856}
2857
2859 switch (Opc) {
2860 default:
2861 return false;
2862 case AArch64::STURSi:
2863 case AArch64::STRSpre:
2864 case AArch64::STURDi:
2865 case AArch64::STRDpre:
2866 case AArch64::STURQi:
2867 case AArch64::STRQpre:
2868 case AArch64::STURBBi:
2869 case AArch64::STURHHi:
2870 case AArch64::STURWi:
2871 case AArch64::STRWpre:
2872 case AArch64::STURXi:
2873 case AArch64::STRXpre:
2874 case AArch64::LDURSi:
2875 case AArch64::LDRSpre:
2876 case AArch64::LDURDi:
2877 case AArch64::LDRDpre:
2878 case AArch64::LDURQi:
2879 case AArch64::LDRQpre:
2880 case AArch64::LDURWi:
2881 case AArch64::LDRWpre:
2882 case AArch64::LDURXi:
2883 case AArch64::LDRXpre:
2884 case AArch64::LDRSWpre:
2885 case AArch64::LDURSWi:
2886 case AArch64::LDURHHi:
2887 case AArch64::LDURBBi:
2888 case AArch64::LDURSBWi:
2889 case AArch64::LDURSHWi:
2890 return true;
2891 }
2892}
2893
2894std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2895 switch (Opc) {
2896 default: return {};
2897 case AArch64::PRFMui: return AArch64::PRFUMi;
2898 case AArch64::LDRXui: return AArch64::LDURXi;
2899 case AArch64::LDRWui: return AArch64::LDURWi;
2900 case AArch64::LDRBui: return AArch64::LDURBi;
2901 case AArch64::LDRHui: return AArch64::LDURHi;
2902 case AArch64::LDRSui: return AArch64::LDURSi;
2903 case AArch64::LDRDui: return AArch64::LDURDi;
2904 case AArch64::LDRQui: return AArch64::LDURQi;
2905 case AArch64::LDRBBui: return AArch64::LDURBBi;
2906 case AArch64::LDRHHui: return AArch64::LDURHHi;
2907 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2908 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2909 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2910 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2911 case AArch64::LDRSWui: return AArch64::LDURSWi;
2912 case AArch64::STRXui: return AArch64::STURXi;
2913 case AArch64::STRWui: return AArch64::STURWi;
2914 case AArch64::STRBui: return AArch64::STURBi;
2915 case AArch64::STRHui: return AArch64::STURHi;
2916 case AArch64::STRSui: return AArch64::STURSi;
2917 case AArch64::STRDui: return AArch64::STURDi;
2918 case AArch64::STRQui: return AArch64::STURQi;
2919 case AArch64::STRBBui: return AArch64::STURBBi;
2920 case AArch64::STRHHui: return AArch64::STURHHi;
2921 }
2922}
2923
2925 switch (Opc) {
2926 default:
2927 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2928 case AArch64::ADDG:
2929 case AArch64::LDAPURBi:
2930 case AArch64::LDAPURHi:
2931 case AArch64::LDAPURi:
2932 case AArch64::LDAPURSBWi:
2933 case AArch64::LDAPURSBXi:
2934 case AArch64::LDAPURSHWi:
2935 case AArch64::LDAPURSHXi:
2936 case AArch64::LDAPURSWi:
2937 case AArch64::LDAPURXi:
2938 case AArch64::LDR_PPXI:
2939 case AArch64::LDR_PXI:
2940 case AArch64::LDR_ZXI:
2941 case AArch64::LDR_ZZXI:
2942 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2943 case AArch64::LDR_ZZZXI:
2944 case AArch64::LDR_ZZZZXI:
2945 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2946 case AArch64::LDRBBui:
2947 case AArch64::LDRBui:
2948 case AArch64::LDRDui:
2949 case AArch64::LDRHHui:
2950 case AArch64::LDRHui:
2951 case AArch64::LDRQui:
2952 case AArch64::LDRSBWui:
2953 case AArch64::LDRSBXui:
2954 case AArch64::LDRSHWui:
2955 case AArch64::LDRSHXui:
2956 case AArch64::LDRSui:
2957 case AArch64::LDRSWui:
2958 case AArch64::LDRWui:
2959 case AArch64::LDRXui:
2960 case AArch64::LDURBBi:
2961 case AArch64::LDURBi:
2962 case AArch64::LDURDi:
2963 case AArch64::LDURHHi:
2964 case AArch64::LDURHi:
2965 case AArch64::LDURQi:
2966 case AArch64::LDURSBWi:
2967 case AArch64::LDURSBXi:
2968 case AArch64::LDURSHWi:
2969 case AArch64::LDURSHXi:
2970 case AArch64::LDURSi:
2971 case AArch64::LDURSWi:
2972 case AArch64::LDURWi:
2973 case AArch64::LDURXi:
2974 case AArch64::PRFMui:
2975 case AArch64::PRFUMi:
2976 case AArch64::ST2Gi:
2977 case AArch64::STGi:
2978 case AArch64::STLURBi:
2979 case AArch64::STLURHi:
2980 case AArch64::STLURWi:
2981 case AArch64::STLURXi:
2982 case AArch64::StoreSwiftAsyncContext:
2983 case AArch64::STR_PPXI:
2984 case AArch64::STR_PXI:
2985 case AArch64::STR_ZXI:
2986 case AArch64::STR_ZZXI:
2987 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2988 case AArch64::STR_ZZZXI:
2989 case AArch64::STR_ZZZZXI:
2990 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2991 case AArch64::STRBBui:
2992 case AArch64::STRBui:
2993 case AArch64::STRDui:
2994 case AArch64::STRHHui:
2995 case AArch64::STRHui:
2996 case AArch64::STRQui:
2997 case AArch64::STRSui:
2998 case AArch64::STRWui:
2999 case AArch64::STRXui:
3000 case AArch64::STURBBi:
3001 case AArch64::STURBi:
3002 case AArch64::STURDi:
3003 case AArch64::STURHHi:
3004 case AArch64::STURHi:
3005 case AArch64::STURQi:
3006 case AArch64::STURSi:
3007 case AArch64::STURWi:
3008 case AArch64::STURXi:
3009 case AArch64::STZ2Gi:
3010 case AArch64::STZGi:
3011 case AArch64::TAGPstack:
3012 return 2;
3013 case AArch64::LD1B_D_IMM:
3014 case AArch64::LD1B_H_IMM:
3015 case AArch64::LD1B_IMM:
3016 case AArch64::LD1B_S_IMM:
3017 case AArch64::LD1D_IMM:
3018 case AArch64::LD1H_D_IMM:
3019 case AArch64::LD1H_IMM:
3020 case AArch64::LD1H_S_IMM:
3021 case AArch64::LD1RB_D_IMM:
3022 case AArch64::LD1RB_H_IMM:
3023 case AArch64::LD1RB_IMM:
3024 case AArch64::LD1RB_S_IMM:
3025 case AArch64::LD1RD_IMM:
3026 case AArch64::LD1RH_D_IMM:
3027 case AArch64::LD1RH_IMM:
3028 case AArch64::LD1RH_S_IMM:
3029 case AArch64::LD1RSB_D_IMM:
3030 case AArch64::LD1RSB_H_IMM:
3031 case AArch64::LD1RSB_S_IMM:
3032 case AArch64::LD1RSH_D_IMM:
3033 case AArch64::LD1RSH_S_IMM:
3034 case AArch64::LD1RSW_IMM:
3035 case AArch64::LD1RW_D_IMM:
3036 case AArch64::LD1RW_IMM:
3037 case AArch64::LD1SB_D_IMM:
3038 case AArch64::LD1SB_H_IMM:
3039 case AArch64::LD1SB_S_IMM:
3040 case AArch64::LD1SH_D_IMM:
3041 case AArch64::LD1SH_S_IMM:
3042 case AArch64::LD1SW_D_IMM:
3043 case AArch64::LD1W_D_IMM:
3044 case AArch64::LD1W_IMM:
3045 case AArch64::LD2B_IMM:
3046 case AArch64::LD2D_IMM:
3047 case AArch64::LD2H_IMM:
3048 case AArch64::LD2W_IMM:
3049 case AArch64::LD3B_IMM:
3050 case AArch64::LD3D_IMM:
3051 case AArch64::LD3H_IMM:
3052 case AArch64::LD3W_IMM:
3053 case AArch64::LD4B_IMM:
3054 case AArch64::LD4D_IMM:
3055 case AArch64::LD4H_IMM:
3056 case AArch64::LD4W_IMM:
3057 case AArch64::LDG:
3058 case AArch64::LDNF1B_D_IMM:
3059 case AArch64::LDNF1B_H_IMM:
3060 case AArch64::LDNF1B_IMM:
3061 case AArch64::LDNF1B_S_IMM:
3062 case AArch64::LDNF1D_IMM:
3063 case AArch64::LDNF1H_D_IMM:
3064 case AArch64::LDNF1H_IMM:
3065 case AArch64::LDNF1H_S_IMM:
3066 case AArch64::LDNF1SB_D_IMM:
3067 case AArch64::LDNF1SB_H_IMM:
3068 case AArch64::LDNF1SB_S_IMM:
3069 case AArch64::LDNF1SH_D_IMM:
3070 case AArch64::LDNF1SH_S_IMM:
3071 case AArch64::LDNF1SW_D_IMM:
3072 case AArch64::LDNF1W_D_IMM:
3073 case AArch64::LDNF1W_IMM:
3074 case AArch64::LDNPDi:
3075 case AArch64::LDNPQi:
3076 case AArch64::LDNPSi:
3077 case AArch64::LDNPWi:
3078 case AArch64::LDNPXi:
3079 case AArch64::LDNT1B_ZRI:
3080 case AArch64::LDNT1D_ZRI:
3081 case AArch64::LDNT1H_ZRI:
3082 case AArch64::LDNT1W_ZRI:
3083 case AArch64::LDPDi:
3084 case AArch64::LDPQi:
3085 case AArch64::LDPSi:
3086 case AArch64::LDPWi:
3087 case AArch64::LDPXi:
3088 case AArch64::LDRBBpost:
3089 case AArch64::LDRBBpre:
3090 case AArch64::LDRBpost:
3091 case AArch64::LDRBpre:
3092 case AArch64::LDRDpost:
3093 case AArch64::LDRDpre:
3094 case AArch64::LDRHHpost:
3095 case AArch64::LDRHHpre:
3096 case AArch64::LDRHpost:
3097 case AArch64::LDRHpre:
3098 case AArch64::LDRQpost:
3099 case AArch64::LDRQpre:
3100 case AArch64::LDRSpost:
3101 case AArch64::LDRSpre:
3102 case AArch64::LDRWpost:
3103 case AArch64::LDRWpre:
3104 case AArch64::LDRXpost:
3105 case AArch64::LDRXpre:
3106 case AArch64::ST1B_D_IMM:
3107 case AArch64::ST1B_H_IMM:
3108 case AArch64::ST1B_IMM:
3109 case AArch64::ST1B_S_IMM:
3110 case AArch64::ST1D_IMM:
3111 case AArch64::ST1H_D_IMM:
3112 case AArch64::ST1H_IMM:
3113 case AArch64::ST1H_S_IMM:
3114 case AArch64::ST1W_D_IMM:
3115 case AArch64::ST1W_IMM:
3116 case AArch64::ST2B_IMM:
3117 case AArch64::ST2D_IMM:
3118 case AArch64::ST2H_IMM:
3119 case AArch64::ST2W_IMM:
3120 case AArch64::ST3B_IMM:
3121 case AArch64::ST3D_IMM:
3122 case AArch64::ST3H_IMM:
3123 case AArch64::ST3W_IMM:
3124 case AArch64::ST4B_IMM:
3125 case AArch64::ST4D_IMM:
3126 case AArch64::ST4H_IMM:
3127 case AArch64::ST4W_IMM:
3128 case AArch64::STGPi:
3129 case AArch64::STGPreIndex:
3130 case AArch64::STZGPreIndex:
3131 case AArch64::ST2GPreIndex:
3132 case AArch64::STZ2GPreIndex:
3133 case AArch64::STGPostIndex:
3134 case AArch64::STZGPostIndex:
3135 case AArch64::ST2GPostIndex:
3136 case AArch64::STZ2GPostIndex:
3137 case AArch64::STNPDi:
3138 case AArch64::STNPQi:
3139 case AArch64::STNPSi:
3140 case AArch64::STNPWi:
3141 case AArch64::STNPXi:
3142 case AArch64::STNT1B_ZRI:
3143 case AArch64::STNT1D_ZRI:
3144 case AArch64::STNT1H_ZRI:
3145 case AArch64::STNT1W_ZRI:
3146 case AArch64::STPDi:
3147 case AArch64::STPQi:
3148 case AArch64::STPSi:
3149 case AArch64::STPWi:
3150 case AArch64::STPXi:
3151 case AArch64::STRBBpost:
3152 case AArch64::STRBBpre:
3153 case AArch64::STRBpost:
3154 case AArch64::STRBpre:
3155 case AArch64::STRDpost:
3156 case AArch64::STRDpre:
3157 case AArch64::STRHHpost:
3158 case AArch64::STRHHpre:
3159 case AArch64::STRHpost:
3160 case AArch64::STRHpre:
3161 case AArch64::STRQpost:
3162 case AArch64::STRQpre:
3163 case AArch64::STRSpost:
3164 case AArch64::STRSpre:
3165 case AArch64::STRWpost:
3166 case AArch64::STRWpre:
3167 case AArch64::STRXpost:
3168 case AArch64::STRXpre:
3169 case AArch64::LD1B_2Z_IMM:
3170 case AArch64::LD1B_2Z_STRIDED_IMM:
3171 case AArch64::LD1H_2Z_IMM:
3172 case AArch64::LD1H_2Z_STRIDED_IMM:
3173 case AArch64::LD1W_2Z_IMM:
3174 case AArch64::LD1W_2Z_STRIDED_IMM:
3175 case AArch64::LD1D_2Z_IMM:
3176 case AArch64::LD1D_2Z_STRIDED_IMM:
3177 case AArch64::LD1B_4Z_IMM:
3178 case AArch64::LD1B_4Z_STRIDED_IMM:
3179 case AArch64::LD1H_4Z_IMM:
3180 case AArch64::LD1H_4Z_STRIDED_IMM:
3181 case AArch64::LD1W_4Z_IMM:
3182 case AArch64::LD1W_4Z_STRIDED_IMM:
3183 case AArch64::LD1D_4Z_IMM:
3184 case AArch64::LD1D_4Z_STRIDED_IMM:
3185 case AArch64::LD1B_2Z_IMM_PSEUDO:
3186 case AArch64::LD1H_2Z_IMM_PSEUDO:
3187 case AArch64::LD1W_2Z_IMM_PSEUDO:
3188 case AArch64::LD1D_2Z_IMM_PSEUDO:
3189 case AArch64::LD1B_4Z_IMM_PSEUDO:
3190 case AArch64::LD1H_4Z_IMM_PSEUDO:
3191 case AArch64::LD1W_4Z_IMM_PSEUDO:
3192 case AArch64::LD1D_4Z_IMM_PSEUDO:
3193 return 3;
3194 case AArch64::LDPDpost:
3195 case AArch64::LDPDpre:
3196 case AArch64::LDPQpost:
3197 case AArch64::LDPQpre:
3198 case AArch64::LDPSpost:
3199 case AArch64::LDPSpre:
3200 case AArch64::LDPWpost:
3201 case AArch64::LDPWpre:
3202 case AArch64::LDPXpost:
3203 case AArch64::LDPXpre:
3204 case AArch64::STGPpre:
3205 case AArch64::STGPpost:
3206 case AArch64::STPDpost:
3207 case AArch64::STPDpre:
3208 case AArch64::STPQpost:
3209 case AArch64::STPQpre:
3210 case AArch64::STPSpost:
3211 case AArch64::STPSpre:
3212 case AArch64::STPWpost:
3213 case AArch64::STPWpre:
3214 case AArch64::STPXpost:
3215 case AArch64::STPXpre:
3216 return 4;
3217 }
3218}
3219
3221 switch (MI.getOpcode()) {
3222 default:
3223 return false;
3224 // Scaled instructions.
3225 case AArch64::STRSui:
3226 case AArch64::STRDui:
3227 case AArch64::STRQui:
3228 case AArch64::STRXui:
3229 case AArch64::STRWui:
3230 case AArch64::LDRSui:
3231 case AArch64::LDRDui:
3232 case AArch64::LDRQui:
3233 case AArch64::LDRXui:
3234 case AArch64::LDRWui:
3235 case AArch64::LDRSWui:
3236 // Unscaled instructions.
3237 case AArch64::STURSi:
3238 case AArch64::STRSpre:
3239 case AArch64::STURDi:
3240 case AArch64::STRDpre:
3241 case AArch64::STURQi:
3242 case AArch64::STRQpre:
3243 case AArch64::STURWi:
3244 case AArch64::STRWpre:
3245 case AArch64::STURXi:
3246 case AArch64::STRXpre:
3247 case AArch64::LDURSi:
3248 case AArch64::LDRSpre:
3249 case AArch64::LDURDi:
3250 case AArch64::LDRDpre:
3251 case AArch64::LDURQi:
3252 case AArch64::LDRQpre:
3253 case AArch64::LDURWi:
3254 case AArch64::LDRWpre:
3255 case AArch64::LDURXi:
3256 case AArch64::LDRXpre:
3257 case AArch64::LDURSWi:
3258 case AArch64::LDRSWpre:
3259 // SVE instructions.
3260 case AArch64::LDR_ZXI:
3261 case AArch64::STR_ZXI:
3262 return true;
3263 }
3264}
3265
3267 switch (MI.getOpcode()) {
3268 default:
3269 assert((!MI.isCall() || !MI.isReturn()) &&
3270 "Unexpected instruction - was a new tail call opcode introduced?");
3271 return false;
3272 case AArch64::TCRETURNdi:
3273 case AArch64::TCRETURNri:
3274 case AArch64::TCRETURNrix16x17:
3275 case AArch64::TCRETURNrix17:
3276 case AArch64::TCRETURNrinotx16:
3277 case AArch64::TCRETURNriALL:
3278 case AArch64::AUTH_TCRETURN:
3279 case AArch64::AUTH_TCRETURN_BTI:
3280 return true;
3281 }
3282}
3283
3285 switch (Opc) {
3286 default:
3287 llvm_unreachable("Opcode has no flag setting equivalent!");
3288 // 32-bit cases:
3289 case AArch64::ADDWri:
3290 return AArch64::ADDSWri;
3291 case AArch64::ADDWrr:
3292 return AArch64::ADDSWrr;
3293 case AArch64::ADDWrs:
3294 return AArch64::ADDSWrs;
3295 case AArch64::ADDWrx:
3296 return AArch64::ADDSWrx;
3297 case AArch64::ANDWri:
3298 return AArch64::ANDSWri;
3299 case AArch64::ANDWrr:
3300 return AArch64::ANDSWrr;
3301 case AArch64::ANDWrs:
3302 return AArch64::ANDSWrs;
3303 case AArch64::BICWrr:
3304 return AArch64::BICSWrr;
3305 case AArch64::BICWrs:
3306 return AArch64::BICSWrs;
3307 case AArch64::SUBWri:
3308 return AArch64::SUBSWri;
3309 case AArch64::SUBWrr:
3310 return AArch64::SUBSWrr;
3311 case AArch64::SUBWrs:
3312 return AArch64::SUBSWrs;
3313 case AArch64::SUBWrx:
3314 return AArch64::SUBSWrx;
3315 // 64-bit cases:
3316 case AArch64::ADDXri:
3317 return AArch64::ADDSXri;
3318 case AArch64::ADDXrr:
3319 return AArch64::ADDSXrr;
3320 case AArch64::ADDXrs:
3321 return AArch64::ADDSXrs;
3322 case AArch64::ADDXrx:
3323 return AArch64::ADDSXrx;
3324 case AArch64::ANDXri:
3325 return AArch64::ANDSXri;
3326 case AArch64::ANDXrr:
3327 return AArch64::ANDSXrr;
3328 case AArch64::ANDXrs:
3329 return AArch64::ANDSXrs;
3330 case AArch64::BICXrr:
3331 return AArch64::BICSXrr;
3332 case AArch64::BICXrs:
3333 return AArch64::BICSXrs;
3334 case AArch64::SUBXri:
3335 return AArch64::SUBSXri;
3336 case AArch64::SUBXrr:
3337 return AArch64::SUBSXrr;
3338 case AArch64::SUBXrs:
3339 return AArch64::SUBSXrs;
3340 case AArch64::SUBXrx:
3341 return AArch64::SUBSXrx;
3342 // SVE instructions:
3343 case AArch64::AND_PPzPP:
3344 return AArch64::ANDS_PPzPP;
3345 case AArch64::BIC_PPzPP:
3346 return AArch64::BICS_PPzPP;
3347 case AArch64::EOR_PPzPP:
3348 return AArch64::EORS_PPzPP;
3349 case AArch64::NAND_PPzPP:
3350 return AArch64::NANDS_PPzPP;
3351 case AArch64::NOR_PPzPP:
3352 return AArch64::NORS_PPzPP;
3353 case AArch64::ORN_PPzPP:
3354 return AArch64::ORNS_PPzPP;
3355 case AArch64::ORR_PPzPP:
3356 return AArch64::ORRS_PPzPP;
3357 case AArch64::BRKA_PPzP:
3358 return AArch64::BRKAS_PPzP;
3359 case AArch64::BRKPA_PPzPP:
3360 return AArch64::BRKPAS_PPzPP;
3361 case AArch64::BRKB_PPzP:
3362 return AArch64::BRKBS_PPzP;
3363 case AArch64::BRKPB_PPzPP:
3364 return AArch64::BRKPBS_PPzPP;
3365 case AArch64::BRKN_PPzP:
3366 return AArch64::BRKNS_PPzP;
3367 case AArch64::RDFFR_PPz:
3368 return AArch64::RDFFRS_PPz;
3369 case AArch64::PTRUE_B:
3370 return AArch64::PTRUES_B;
3371 }
3372}
3373
3374// Is this a candidate for ld/st merging or pairing? For example, we don't
3375// touch volatiles or load/stores that have a hint to avoid pair formation.
3377
3378 bool IsPreLdSt = isPreLdSt(MI);
3379
3380 // If this is a volatile load/store, don't mess with it.
3381 if (MI.hasOrderedMemoryRef())
3382 return false;
3383
3384 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
3385 // For Pre-inc LD/ST, the operand is shifted by one.
3386 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
3387 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
3388 "Expected a reg or frame index operand.");
3389
3390 // For Pre-indexed addressing quadword instructions, the third operand is the
3391 // immediate value.
3392 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
3393
3394 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
3395 return false;
3396
3397 // Can't merge/pair if the instruction modifies the base register.
3398 // e.g., ldr x0, [x0]
3399 // This case will never occur with an FI base.
3400 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
3401 // STR<S,D,Q,W,X>pre, it can be merged.
3402 // For example:
3403 // ldr q0, [x11, #32]!
3404 // ldr q1, [x11, #16]
3405 // to
3406 // ldp q0, q1, [x11, #32]!
3407 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
3408 Register BaseReg = MI.getOperand(1).getReg();
3410 if (MI.modifiesRegister(BaseReg, TRI))
3411 return false;
3412 }
3413
3414 // Pairing SVE fills/spills is only valid for little-endian targets that
3415 // implement VLS 128.
3416 switch (MI.getOpcode()) {
3417 default:
3418 break;
3419 case AArch64::LDR_ZXI:
3420 case AArch64::STR_ZXI:
3421 if (!Subtarget.isLittleEndian() ||
3422 Subtarget.getSVEVectorSizeInBits() != 128)
3423 return false;
3424 }
3425
3426 // Check if this load/store has a hint to avoid pair formation.
3427 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
3429 return false;
3430
3431 // Do not pair any callee-save store/reload instructions in the
3432 // prologue/epilogue if the CFI information encoded the operations as separate
3433 // instructions, as that will cause the size of the actual prologue to mismatch
3434 // with the prologue size recorded in the Windows CFI.
3435 const MCAsmInfo &MAI = MI.getMF()->getTarget().getMCAsmInfo();
3436 bool NeedsWinCFI =
3437 MAI.usesWindowsCFI() && MI.getMF()->getFunction().needsUnwindTableEntry();
3438 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
3440 return false;
3441
3442 // On some CPUs quad load/store pairs are slower than two single load/stores.
3443 if (Subtarget.isPaired128Slow()) {
3444 switch (MI.getOpcode()) {
3445 default:
3446 break;
3447 case AArch64::LDURQi:
3448 case AArch64::STURQi:
3449 case AArch64::LDRQui:
3450 case AArch64::STRQui:
3451 return false;
3452 }
3453 }
3454
3455 return true;
3456}
3457
3460 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3461 const TargetRegisterInfo *TRI) const {
3462 if (!LdSt.mayLoadOrStore())
3463 return false;
3464
3465 const MachineOperand *BaseOp;
3466 TypeSize WidthN(0, false);
3467 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3468 WidthN, TRI))
3469 return false;
3470 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3471 // vector.
3472 Width = LocationSize::precise(WidthN);
3473 BaseOps.push_back(BaseOp);
3474 return true;
3475}
3476
3477std::optional<ExtAddrMode>
3479 const TargetRegisterInfo *TRI) const {
3480 const MachineOperand *Base; // Filled with the base operand of MI.
3481 int64_t Offset; // Filled with the offset of MI.
3482 bool OffsetIsScalable;
3483 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3484 return std::nullopt;
3485
3486 if (!Base->isReg())
3487 return std::nullopt;
3488 ExtAddrMode AM;
3489 AM.BaseReg = Base->getReg();
3490 AM.Displacement = Offset;
3491 AM.ScaledReg = 0;
3492 AM.Scale = 0;
3493 return AM;
3494}
3495
3497 Register Reg,
3498 const MachineInstr &AddrI,
3499 ExtAddrMode &AM) const {
3500 // Filter out instructions into which we cannot fold.
3501 unsigned NumBytes;
3502 int64_t OffsetScale = 1;
3503 switch (MemI.getOpcode()) {
3504 default:
3505 return false;
3506
3507 case AArch64::LDURQi:
3508 case AArch64::STURQi:
3509 NumBytes = 16;
3510 break;
3511
3512 case AArch64::LDURDi:
3513 case AArch64::STURDi:
3514 case AArch64::LDURXi:
3515 case AArch64::STURXi:
3516 NumBytes = 8;
3517 break;
3518
3519 case AArch64::LDURWi:
3520 case AArch64::LDURSWi:
3521 case AArch64::STURWi:
3522 NumBytes = 4;
3523 break;
3524
3525 case AArch64::LDURHi:
3526 case AArch64::STURHi:
3527 case AArch64::LDURHHi:
3528 case AArch64::STURHHi:
3529 case AArch64::LDURSHXi:
3530 case AArch64::LDURSHWi:
3531 NumBytes = 2;
3532 break;
3533
3534 case AArch64::LDRBroX:
3535 case AArch64::LDRBBroX:
3536 case AArch64::LDRSBXroX:
3537 case AArch64::LDRSBWroX:
3538 case AArch64::STRBroX:
3539 case AArch64::STRBBroX:
3540 case AArch64::LDURBi:
3541 case AArch64::LDURBBi:
3542 case AArch64::LDURSBXi:
3543 case AArch64::LDURSBWi:
3544 case AArch64::STURBi:
3545 case AArch64::STURBBi:
3546 case AArch64::LDRBui:
3547 case AArch64::LDRBBui:
3548 case AArch64::LDRSBXui:
3549 case AArch64::LDRSBWui:
3550 case AArch64::STRBui:
3551 case AArch64::STRBBui:
3552 NumBytes = 1;
3553 break;
3554
3555 case AArch64::LDRQroX:
3556 case AArch64::STRQroX:
3557 case AArch64::LDRQui:
3558 case AArch64::STRQui:
3559 NumBytes = 16;
3560 OffsetScale = 16;
3561 break;
3562
3563 case AArch64::LDRDroX:
3564 case AArch64::STRDroX:
3565 case AArch64::LDRXroX:
3566 case AArch64::STRXroX:
3567 case AArch64::LDRDui:
3568 case AArch64::STRDui:
3569 case AArch64::LDRXui:
3570 case AArch64::STRXui:
3571 NumBytes = 8;
3572 OffsetScale = 8;
3573 break;
3574
3575 case AArch64::LDRWroX:
3576 case AArch64::LDRSWroX:
3577 case AArch64::STRWroX:
3578 case AArch64::LDRWui:
3579 case AArch64::LDRSWui:
3580 case AArch64::STRWui:
3581 NumBytes = 4;
3582 OffsetScale = 4;
3583 break;
3584
3585 case AArch64::LDRHroX:
3586 case AArch64::STRHroX:
3587 case AArch64::LDRHHroX:
3588 case AArch64::STRHHroX:
3589 case AArch64::LDRSHXroX:
3590 case AArch64::LDRSHWroX:
3591 case AArch64::LDRHui:
3592 case AArch64::STRHui:
3593 case AArch64::LDRHHui:
3594 case AArch64::STRHHui:
3595 case AArch64::LDRSHXui:
3596 case AArch64::LDRSHWui:
3597 NumBytes = 2;
3598 OffsetScale = 2;
3599 break;
3600 }
3601
3602 // Check the fold operand is not the loaded/stored value.
3603 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3604 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3605 return false;
3606
3607 // Handle memory instructions with a [Reg, Reg] addressing mode.
3608 if (MemI.getOperand(2).isReg()) {
3609 // Bail if the addressing mode already includes extension of the offset
3610 // register.
3611 if (MemI.getOperand(3).getImm())
3612 return false;
3613
3614 // Check if we actually have a scaled offset.
3615 if (MemI.getOperand(4).getImm() == 0)
3616 OffsetScale = 1;
3617
3618 // If the address instructions is folded into the base register, then the
3619 // addressing mode must not have a scale. Then we can swap the base and the
3620 // scaled registers.
3621 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3622 return false;
3623
3624 switch (AddrI.getOpcode()) {
3625 default:
3626 return false;
3627
3628 case AArch64::SBFMXri:
3629 // sxtw Xa, Wm
3630 // ldr Xd, [Xn, Xa, lsl #N]
3631 // ->
3632 // ldr Xd, [Xn, Wm, sxtw #N]
3633 if (AddrI.getOperand(2).getImm() != 0 ||
3634 AddrI.getOperand(3).getImm() != 31)
3635 return false;
3636
3637 AM.BaseReg = MemI.getOperand(1).getReg();
3638 if (AM.BaseReg == Reg)
3639 AM.BaseReg = MemI.getOperand(2).getReg();
3640 AM.ScaledReg = AddrI.getOperand(1).getReg();
3641 AM.Scale = OffsetScale;
3642 AM.Displacement = 0;
3644 return true;
3645
3646 case TargetOpcode::SUBREG_TO_REG: {
3647 // mov Wa, Wm
3648 // ldr Xd, [Xn, Xa, lsl #N]
3649 // ->
3650 // ldr Xd, [Xn, Wm, uxtw #N]
3651
3652 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3653 if (AddrI.getOperand(2).getImm() != AArch64::sub_32)
3654 return false;
3655
3656 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3657 Register OffsetReg = AddrI.getOperand(1).getReg();
3658 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3659 return false;
3660
3661 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3662 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3663 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3664 DefMI.getOperand(3).getImm() != 0)
3665 return false;
3666
3667 AM.BaseReg = MemI.getOperand(1).getReg();
3668 if (AM.BaseReg == Reg)
3669 AM.BaseReg = MemI.getOperand(2).getReg();
3670 AM.ScaledReg = DefMI.getOperand(2).getReg();
3671 AM.Scale = OffsetScale;
3672 AM.Displacement = 0;
3674 return true;
3675 }
3676 }
3677 }
3678
3679 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3680
3681 // Check we are not breaking a potential conversion to an LDP.
3682 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3683 int64_t NewOffset) -> bool {
3684 int64_t MinOffset, MaxOffset;
3685 switch (NumBytes) {
3686 default:
3687 return true;
3688 case 4:
3689 MinOffset = -256;
3690 MaxOffset = 252;
3691 break;
3692 case 8:
3693 MinOffset = -512;
3694 MaxOffset = 504;
3695 break;
3696 case 16:
3697 MinOffset = -1024;
3698 MaxOffset = 1008;
3699 break;
3700 }
3701 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3702 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3703 };
3704 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3705 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3706 int64_t NewOffset = OldOffset + Disp;
3707 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3708 return false;
3709 // If the old offset would fit into an LDP, but the new offset wouldn't,
3710 // bail out.
3711 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3712 return false;
3713 AM.BaseReg = AddrI.getOperand(1).getReg();
3714 AM.ScaledReg = 0;
3715 AM.Scale = 0;
3716 AM.Displacement = NewOffset;
3718 return true;
3719 };
3720
3721 auto canFoldAddRegIntoAddrMode =
3722 [&](int64_t Scale,
3724 if (MemI.getOperand(2).getImm() != 0)
3725 return false;
3726 if ((unsigned)Scale != Scale)
3727 return false;
3728 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3729 return false;
3730 AM.BaseReg = AddrI.getOperand(1).getReg();
3731 AM.ScaledReg = AddrI.getOperand(2).getReg();
3732 AM.Scale = Scale;
3733 AM.Displacement = 0;
3734 AM.Form = Form;
3735 return true;
3736 };
3737
3738 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3739 unsigned Opcode = MemI.getOpcode();
3740 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3741 Subtarget.isSTRQroSlow();
3742 };
3743
3744 int64_t Disp = 0;
3745 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3746 switch (AddrI.getOpcode()) {
3747 default:
3748 return false;
3749
3750 case AArch64::ADDXri:
3751 // add Xa, Xn, #N
3752 // ldr Xd, [Xa, #M]
3753 // ->
3754 // ldr Xd, [Xn, #N'+M]
3755 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3756 return canFoldAddSubImmIntoAddrMode(Disp);
3757
3758 case AArch64::SUBXri:
3759 // sub Xa, Xn, #N
3760 // ldr Xd, [Xa, #M]
3761 // ->
3762 // ldr Xd, [Xn, #N'+M]
3763 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3764 return canFoldAddSubImmIntoAddrMode(-Disp);
3765
3766 case AArch64::ADDXrs: {
3767 // add Xa, Xn, Xm, lsl #N
3768 // ldr Xd, [Xa]
3769 // ->
3770 // ldr Xd, [Xn, Xm, lsl #N]
3771
3772 // Don't fold the add if the result would be slower, unless optimising for
3773 // size.
3774 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3776 return false;
3777 Shift = AArch64_AM::getShiftValue(Shift);
3778 if (!OptSize) {
3779 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3780 return false;
3781 if (avoidSlowSTRQ(MemI))
3782 return false;
3783 }
3784 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3785 }
3786
3787 case AArch64::ADDXrr:
3788 // add Xa, Xn, Xm
3789 // ldr Xd, [Xa]
3790 // ->
3791 // ldr Xd, [Xn, Xm, lsl #0]
3792
3793 // Don't fold the add if the result would be slower, unless optimising for
3794 // size.
3795 if (!OptSize && avoidSlowSTRQ(MemI))
3796 return false;
3797 return canFoldAddRegIntoAddrMode(1);
3798
3799 case AArch64::ADDXrx:
3800 // add Xa, Xn, Wm, {s,u}xtw #N
3801 // ldr Xd, [Xa]
3802 // ->
3803 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3804
3805 // Don't fold the add if the result would be slower, unless optimising for
3806 // size.
3807 if (!OptSize && avoidSlowSTRQ(MemI))
3808 return false;
3809
3810 // Can fold only sign-/zero-extend of a word.
3811 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3813 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3814 return false;
3815
3816 return canFoldAddRegIntoAddrMode(
3817 1ULL << AArch64_AM::getArithShiftValue(Imm),
3820 }
3821}
3822
3823// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3824// return the opcode of an instruction performing the same operation, but using
3825// the [Reg, Reg] addressing mode.
3826static unsigned regOffsetOpcode(unsigned Opcode) {
3827 switch (Opcode) {
3828 default:
3829 llvm_unreachable("Address folding not implemented for instruction");
3830
3831 case AArch64::LDURQi:
3832 case AArch64::LDRQui:
3833 return AArch64::LDRQroX;
3834 case AArch64::STURQi:
3835 case AArch64::STRQui:
3836 return AArch64::STRQroX;
3837 case AArch64::LDURDi:
3838 case AArch64::LDRDui:
3839 return AArch64::LDRDroX;
3840 case AArch64::STURDi:
3841 case AArch64::STRDui:
3842 return AArch64::STRDroX;
3843 case AArch64::LDURXi:
3844 case AArch64::LDRXui:
3845 return AArch64::LDRXroX;
3846 case AArch64::STURXi:
3847 case AArch64::STRXui:
3848 return AArch64::STRXroX;
3849 case AArch64::LDURWi:
3850 case AArch64::LDRWui:
3851 return AArch64::LDRWroX;
3852 case AArch64::LDURSWi:
3853 case AArch64::LDRSWui:
3854 return AArch64::LDRSWroX;
3855 case AArch64::STURWi:
3856 case AArch64::STRWui:
3857 return AArch64::STRWroX;
3858 case AArch64::LDURHi:
3859 case AArch64::LDRHui:
3860 return AArch64::LDRHroX;
3861 case AArch64::STURHi:
3862 case AArch64::STRHui:
3863 return AArch64::STRHroX;
3864 case AArch64::LDURHHi:
3865 case AArch64::LDRHHui:
3866 return AArch64::LDRHHroX;
3867 case AArch64::STURHHi:
3868 case AArch64::STRHHui:
3869 return AArch64::STRHHroX;
3870 case AArch64::LDURSHXi:
3871 case AArch64::LDRSHXui:
3872 return AArch64::LDRSHXroX;
3873 case AArch64::LDURSHWi:
3874 case AArch64::LDRSHWui:
3875 return AArch64::LDRSHWroX;
3876 case AArch64::LDURBi:
3877 case AArch64::LDRBui:
3878 return AArch64::LDRBroX;
3879 case AArch64::LDURBBi:
3880 case AArch64::LDRBBui:
3881 return AArch64::LDRBBroX;
3882 case AArch64::LDURSBXi:
3883 case AArch64::LDRSBXui:
3884 return AArch64::LDRSBXroX;
3885 case AArch64::LDURSBWi:
3886 case AArch64::LDRSBWui:
3887 return AArch64::LDRSBWroX;
3888 case AArch64::STURBi:
3889 case AArch64::STRBui:
3890 return AArch64::STRBroX;
3891 case AArch64::STURBBi:
3892 case AArch64::STRBBui:
3893 return AArch64::STRBBroX;
3894 }
3895}
3896
3897// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3898// the opcode of an instruction performing the same operation, but using the
3899// [Reg, #Imm] addressing mode with scaled offset.
3900unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3901 switch (Opcode) {
3902 default:
3903 llvm_unreachable("Address folding not implemented for instruction");
3904
3905 case AArch64::LDURQi:
3906 Scale = 16;
3907 return AArch64::LDRQui;
3908 case AArch64::STURQi:
3909 Scale = 16;
3910 return AArch64::STRQui;
3911 case AArch64::LDURDi:
3912 Scale = 8;
3913 return AArch64::LDRDui;
3914 case AArch64::STURDi:
3915 Scale = 8;
3916 return AArch64::STRDui;
3917 case AArch64::LDURXi:
3918 Scale = 8;
3919 return AArch64::LDRXui;
3920 case AArch64::STURXi:
3921 Scale = 8;
3922 return AArch64::STRXui;
3923 case AArch64::LDURWi:
3924 Scale = 4;
3925 return AArch64::LDRWui;
3926 case AArch64::LDURSWi:
3927 Scale = 4;
3928 return AArch64::LDRSWui;
3929 case AArch64::STURWi:
3930 Scale = 4;
3931 return AArch64::STRWui;
3932 case AArch64::LDURHi:
3933 Scale = 2;
3934 return AArch64::LDRHui;
3935 case AArch64::STURHi:
3936 Scale = 2;
3937 return AArch64::STRHui;
3938 case AArch64::LDURHHi:
3939 Scale = 2;
3940 return AArch64::LDRHHui;
3941 case AArch64::STURHHi:
3942 Scale = 2;
3943 return AArch64::STRHHui;
3944 case AArch64::LDURSHXi:
3945 Scale = 2;
3946 return AArch64::LDRSHXui;
3947 case AArch64::LDURSHWi:
3948 Scale = 2;
3949 return AArch64::LDRSHWui;
3950 case AArch64::LDURBi:
3951 Scale = 1;
3952 return AArch64::LDRBui;
3953 case AArch64::LDURBBi:
3954 Scale = 1;
3955 return AArch64::LDRBBui;
3956 case AArch64::LDURSBXi:
3957 Scale = 1;
3958 return AArch64::LDRSBXui;
3959 case AArch64::LDURSBWi:
3960 Scale = 1;
3961 return AArch64::LDRSBWui;
3962 case AArch64::STURBi:
3963 Scale = 1;
3964 return AArch64::STRBui;
3965 case AArch64::STURBBi:
3966 Scale = 1;
3967 return AArch64::STRBBui;
3968 case AArch64::LDRQui:
3969 case AArch64::STRQui:
3970 Scale = 16;
3971 return Opcode;
3972 case AArch64::LDRDui:
3973 case AArch64::STRDui:
3974 case AArch64::LDRXui:
3975 case AArch64::STRXui:
3976 Scale = 8;
3977 return Opcode;
3978 case AArch64::LDRWui:
3979 case AArch64::LDRSWui:
3980 case AArch64::STRWui:
3981 Scale = 4;
3982 return Opcode;
3983 case AArch64::LDRHui:
3984 case AArch64::STRHui:
3985 case AArch64::LDRHHui:
3986 case AArch64::STRHHui:
3987 case AArch64::LDRSHXui:
3988 case AArch64::LDRSHWui:
3989 Scale = 2;
3990 return Opcode;
3991 case AArch64::LDRBui:
3992 case AArch64::LDRBBui:
3993 case AArch64::LDRSBXui:
3994 case AArch64::LDRSBWui:
3995 case AArch64::STRBui:
3996 case AArch64::STRBBui:
3997 Scale = 1;
3998 return Opcode;
3999 }
4000}
4001
4002// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
4003// the opcode of an instruction performing the same operation, but using the
4004// [Reg, #Imm] addressing mode with unscaled offset.
4005unsigned unscaledOffsetOpcode(unsigned Opcode) {
4006 switch (Opcode) {
4007 default:
4008 llvm_unreachable("Address folding not implemented for instruction");
4009
4010 case AArch64::LDURQi:
4011 case AArch64::STURQi:
4012 case AArch64::LDURDi:
4013 case AArch64::STURDi:
4014 case AArch64::LDURXi:
4015 case AArch64::STURXi:
4016 case AArch64::LDURWi:
4017 case AArch64::LDURSWi:
4018 case AArch64::STURWi:
4019 case AArch64::LDURHi:
4020 case AArch64::STURHi:
4021 case AArch64::LDURHHi:
4022 case AArch64::STURHHi:
4023 case AArch64::LDURSHXi:
4024 case AArch64::LDURSHWi:
4025 case AArch64::LDURBi:
4026 case AArch64::STURBi:
4027 case AArch64::LDURBBi:
4028 case AArch64::STURBBi:
4029 case AArch64::LDURSBWi:
4030 case AArch64::LDURSBXi:
4031 return Opcode;
4032 case AArch64::LDRQui:
4033 return AArch64::LDURQi;
4034 case AArch64::STRQui:
4035 return AArch64::STURQi;
4036 case AArch64::LDRDui:
4037 return AArch64::LDURDi;
4038 case AArch64::STRDui:
4039 return AArch64::STURDi;
4040 case AArch64::LDRXui:
4041 return AArch64::LDURXi;
4042 case AArch64::STRXui:
4043 return AArch64::STURXi;
4044 case AArch64::LDRWui:
4045 return AArch64::LDURWi;
4046 case AArch64::LDRSWui:
4047 return AArch64::LDURSWi;
4048 case AArch64::STRWui:
4049 return AArch64::STURWi;
4050 case AArch64::LDRHui:
4051 return AArch64::LDURHi;
4052 case AArch64::STRHui:
4053 return AArch64::STURHi;
4054 case AArch64::LDRHHui:
4055 return AArch64::LDURHHi;
4056 case AArch64::STRHHui:
4057 return AArch64::STURHHi;
4058 case AArch64::LDRSHXui:
4059 return AArch64::LDURSHXi;
4060 case AArch64::LDRSHWui:
4061 return AArch64::LDURSHWi;
4062 case AArch64::LDRBBui:
4063 return AArch64::LDURBBi;
4064 case AArch64::LDRBui:
4065 return AArch64::LDURBi;
4066 case AArch64::STRBBui:
4067 return AArch64::STURBBi;
4068 case AArch64::STRBui:
4069 return AArch64::STURBi;
4070 case AArch64::LDRSBWui:
4071 return AArch64::LDURSBWi;
4072 case AArch64::LDRSBXui:
4073 return AArch64::LDURSBXi;
4074 }
4075}
4076
4077// Given the opcode of a memory load/store instruction, return the opcode of an
4078// instruction performing the same operation, but using
4079// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
4080// offset register.
4081static unsigned offsetExtendOpcode(unsigned Opcode) {
4082 switch (Opcode) {
4083 default:
4084 llvm_unreachable("Address folding not implemented for instruction");
4085
4086 case AArch64::LDRQroX:
4087 case AArch64::LDURQi:
4088 case AArch64::LDRQui:
4089 return AArch64::LDRQroW;
4090 case AArch64::STRQroX:
4091 case AArch64::STURQi:
4092 case AArch64::STRQui:
4093 return AArch64::STRQroW;
4094 case AArch64::LDRDroX:
4095 case AArch64::LDURDi:
4096 case AArch64::LDRDui:
4097 return AArch64::LDRDroW;
4098 case AArch64::STRDroX:
4099 case AArch64::STURDi:
4100 case AArch64::STRDui:
4101 return AArch64::STRDroW;
4102 case AArch64::LDRXroX:
4103 case AArch64::LDURXi:
4104 case AArch64::LDRXui:
4105 return AArch64::LDRXroW;
4106 case AArch64::STRXroX:
4107 case AArch64::STURXi:
4108 case AArch64::STRXui:
4109 return AArch64::STRXroW;
4110 case AArch64::LDRWroX:
4111 case AArch64::LDURWi:
4112 case AArch64::LDRWui:
4113 return AArch64::LDRWroW;
4114 case AArch64::LDRSWroX:
4115 case AArch64::LDURSWi:
4116 case AArch64::LDRSWui:
4117 return AArch64::LDRSWroW;
4118 case AArch64::STRWroX:
4119 case AArch64::STURWi:
4120 case AArch64::STRWui:
4121 return AArch64::STRWroW;
4122 case AArch64::LDRHroX:
4123 case AArch64::LDURHi:
4124 case AArch64::LDRHui:
4125 return AArch64::LDRHroW;
4126 case AArch64::STRHroX:
4127 case AArch64::STURHi:
4128 case AArch64::STRHui:
4129 return AArch64::STRHroW;
4130 case AArch64::LDRHHroX:
4131 case AArch64::LDURHHi:
4132 case AArch64::LDRHHui:
4133 return AArch64::LDRHHroW;
4134 case AArch64::STRHHroX:
4135 case AArch64::STURHHi:
4136 case AArch64::STRHHui:
4137 return AArch64::STRHHroW;
4138 case AArch64::LDRSHXroX:
4139 case AArch64::LDURSHXi:
4140 case AArch64::LDRSHXui:
4141 return AArch64::LDRSHXroW;
4142 case AArch64::LDRSHWroX:
4143 case AArch64::LDURSHWi:
4144 case AArch64::LDRSHWui:
4145 return AArch64::LDRSHWroW;
4146 case AArch64::LDRBroX:
4147 case AArch64::LDURBi:
4148 case AArch64::LDRBui:
4149 return AArch64::LDRBroW;
4150 case AArch64::LDRBBroX:
4151 case AArch64::LDURBBi:
4152 case AArch64::LDRBBui:
4153 return AArch64::LDRBBroW;
4154 case AArch64::LDRSBXroX:
4155 case AArch64::LDURSBXi:
4156 case AArch64::LDRSBXui:
4157 return AArch64::LDRSBXroW;
4158 case AArch64::LDRSBWroX:
4159 case AArch64::LDURSBWi:
4160 case AArch64::LDRSBWui:
4161 return AArch64::LDRSBWroW;
4162 case AArch64::STRBroX:
4163 case AArch64::STURBi:
4164 case AArch64::STRBui:
4165 return AArch64::STRBroW;
4166 case AArch64::STRBBroX:
4167 case AArch64::STURBBi:
4168 case AArch64::STRBBui:
4169 return AArch64::STRBBroW;
4170 }
4171}
4172
4174 const ExtAddrMode &AM) const {
4175
4176 const DebugLoc &DL = MemI.getDebugLoc();
4177 MachineBasicBlock &MBB = *MemI.getParent();
4178 MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
4179
4181 if (AM.ScaledReg) {
4182 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
4183 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
4184 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4185 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4186 .addReg(MemI.getOperand(0).getReg(),
4187 getDefRegState(MemI.mayLoad()))
4188 .addReg(AM.BaseReg)
4189 .addReg(AM.ScaledReg)
4190 .addImm(0)
4191 .addImm(AM.Scale > 1)
4192 .setMemRefs(MemI.memoperands())
4193 .setMIFlags(MemI.getFlags());
4194 return B.getInstr();
4195 }
4196
4197 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
4198 "Addressing mode not supported for folding");
4199
4200 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
4201 unsigned Scale = 1;
4202 unsigned Opcode = MemI.getOpcode();
4203 if (isInt<9>(AM.Displacement))
4204 Opcode = unscaledOffsetOpcode(Opcode);
4205 else
4206 Opcode = scaledOffsetOpcode(Opcode, Scale);
4207
4208 auto B =
4209 BuildMI(MBB, MemI, DL, get(Opcode))
4210 .addReg(MemI.getOperand(0).getReg(), getDefRegState(MemI.mayLoad()))
4211 .addReg(AM.BaseReg)
4212 .addImm(AM.Displacement / Scale)
4213 .setMemRefs(MemI.memoperands())
4214 .setMIFlags(MemI.getFlags());
4215 return B.getInstr();
4216 }
4217
4220 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
4221 assert(AM.ScaledReg && !AM.Displacement &&
4222 "Address offset can be a register or an immediate, but not both");
4223 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
4224 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4225 // Make sure the offset register is in the correct register class.
4226 Register OffsetReg = AM.ScaledReg;
4227 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
4228 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
4229 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4230 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
4231 .addReg(AM.ScaledReg, {}, AArch64::sub_32);
4232 }
4233 auto B =
4234 BuildMI(MBB, MemI, DL, get(Opcode))
4235 .addReg(MemI.getOperand(0).getReg(), getDefRegState(MemI.mayLoad()))
4236 .addReg(AM.BaseReg)
4237 .addReg(OffsetReg)
4239 .addImm(AM.Scale != 1)
4240 .setMemRefs(MemI.memoperands())
4241 .setMIFlags(MemI.getFlags());
4242
4243 return B.getInstr();
4244 }
4245
4247 "Function must not be called with an addressing mode it can't handle");
4248}
4249
4250/// Return true if the opcode is a post-index ld/st instruction, which really
4251/// loads from base+0.
4252static bool isPostIndexLdStOpcode(unsigned Opcode) {
4253 switch (Opcode) {
4254 default:
4255 return false;
4256 case AArch64::LD1Fourv16b_POST:
4257 case AArch64::LD1Fourv1d_POST:
4258 case AArch64::LD1Fourv2d_POST:
4259 case AArch64::LD1Fourv2s_POST:
4260 case AArch64::LD1Fourv4h_POST:
4261 case AArch64::LD1Fourv4s_POST:
4262 case AArch64::LD1Fourv8b_POST:
4263 case AArch64::LD1Fourv8h_POST:
4264 case AArch64::LD1Onev16b_POST:
4265 case AArch64::LD1Onev1d_POST:
4266 case AArch64::LD1Onev2d_POST:
4267 case AArch64::LD1Onev2s_POST:
4268 case AArch64::LD1Onev4h_POST:
4269 case AArch64::LD1Onev4s_POST:
4270 case AArch64::LD1Onev8b_POST:
4271 case AArch64::LD1Onev8h_POST:
4272 case AArch64::LD1Rv16b_POST:
4273 case AArch64::LD1Rv1d_POST:
4274 case AArch64::LD1Rv2d_POST:
4275 case AArch64::LD1Rv2s_POST:
4276 case AArch64::LD1Rv4h_POST:
4277 case AArch64::LD1Rv4s_POST:
4278 case AArch64::LD1Rv8b_POST:
4279 case AArch64::LD1Rv8h_POST:
4280 case AArch64::LD1Threev16b_POST:
4281 case AArch64::LD1Threev1d_POST:
4282 case AArch64::LD1Threev2d_POST:
4283 case AArch64::LD1Threev2s_POST:
4284 case AArch64::LD1Threev4h_POST:
4285 case AArch64::LD1Threev4s_POST:
4286 case AArch64::LD1Threev8b_POST:
4287 case AArch64::LD1Threev8h_POST:
4288 case AArch64::LD1Twov16b_POST:
4289 case AArch64::LD1Twov1d_POST:
4290 case AArch64::LD1Twov2d_POST:
4291 case AArch64::LD1Twov2s_POST:
4292 case AArch64::LD1Twov4h_POST:
4293 case AArch64::LD1Twov4s_POST:
4294 case AArch64::LD1Twov8b_POST:
4295 case AArch64::LD1Twov8h_POST:
4296 case AArch64::LD1i16_POST:
4297 case AArch64::LD1i32_POST:
4298 case AArch64::LD1i64_POST:
4299 case AArch64::LD1i8_POST:
4300 case AArch64::LD2Rv16b_POST:
4301 case AArch64::LD2Rv1d_POST:
4302 case AArch64::LD2Rv2d_POST:
4303 case AArch64::LD2Rv2s_POST:
4304 case AArch64::LD2Rv4h_POST:
4305 case AArch64::LD2Rv4s_POST:
4306 case AArch64::LD2Rv8b_POST:
4307 case AArch64::LD2Rv8h_POST:
4308 case AArch64::LD2Twov16b_POST:
4309 case AArch64::LD2Twov2d_POST:
4310 case AArch64::LD2Twov2s_POST:
4311 case AArch64::LD2Twov4h_POST:
4312 case AArch64::LD2Twov4s_POST:
4313 case AArch64::LD2Twov8b_POST:
4314 case AArch64::LD2Twov8h_POST:
4315 case AArch64::LD2i16_POST:
4316 case AArch64::LD2i32_POST:
4317 case AArch64::LD2i64_POST:
4318 case AArch64::LD2i8_POST:
4319 case AArch64::LD3Rv16b_POST:
4320 case AArch64::LD3Rv1d_POST:
4321 case AArch64::LD3Rv2d_POST:
4322 case AArch64::LD3Rv2s_POST:
4323 case AArch64::LD3Rv4h_POST:
4324 case AArch64::LD3Rv4s_POST:
4325 case AArch64::LD3Rv8b_POST:
4326 case AArch64::LD3Rv8h_POST:
4327 case AArch64::LD3Threev16b_POST:
4328 case AArch64::LD3Threev2d_POST:
4329 case AArch64::LD3Threev2s_POST:
4330 case AArch64::LD3Threev4h_POST:
4331 case AArch64::LD3Threev4s_POST:
4332 case AArch64::LD3Threev8b_POST:
4333 case AArch64::LD3Threev8h_POST:
4334 case AArch64::LD3i16_POST:
4335 case AArch64::LD3i32_POST:
4336 case AArch64::LD3i64_POST:
4337 case AArch64::LD3i8_POST:
4338 case AArch64::LD4Fourv16b_POST:
4339 case AArch64::LD4Fourv2d_POST:
4340 case AArch64::LD4Fourv2s_POST:
4341 case AArch64::LD4Fourv4h_POST:
4342 case AArch64::LD4Fourv4s_POST:
4343 case AArch64::LD4Fourv8b_POST:
4344 case AArch64::LD4Fourv8h_POST:
4345 case AArch64::LD4Rv16b_POST:
4346 case AArch64::LD4Rv1d_POST:
4347 case AArch64::LD4Rv2d_POST:
4348 case AArch64::LD4Rv2s_POST:
4349 case AArch64::LD4Rv4h_POST:
4350 case AArch64::LD4Rv4s_POST:
4351 case AArch64::LD4Rv8b_POST:
4352 case AArch64::LD4Rv8h_POST:
4353 case AArch64::LD4i16_POST:
4354 case AArch64::LD4i32_POST:
4355 case AArch64::LD4i64_POST:
4356 case AArch64::LD4i8_POST:
4357 case AArch64::LDAPRWpost:
4358 case AArch64::LDAPRXpost:
4359 case AArch64::LDIAPPWpost:
4360 case AArch64::LDIAPPXpost:
4361 case AArch64::LDPDpost:
4362 case AArch64::LDPQpost:
4363 case AArch64::LDPSWpost:
4364 case AArch64::LDPSpost:
4365 case AArch64::LDPWpost:
4366 case AArch64::LDPXpost:
4367 case AArch64::LDRBBpost:
4368 case AArch64::LDRBpost:
4369 case AArch64::LDRDpost:
4370 case AArch64::LDRHHpost:
4371 case AArch64::LDRHpost:
4372 case AArch64::LDRQpost:
4373 case AArch64::LDRSBWpost:
4374 case AArch64::LDRSBXpost:
4375 case AArch64::LDRSHWpost:
4376 case AArch64::LDRSHXpost:
4377 case AArch64::LDRSWpost:
4378 case AArch64::LDRSpost:
4379 case AArch64::LDRWpost:
4380 case AArch64::LDRXpost:
4381 case AArch64::ST1Fourv16b_POST:
4382 case AArch64::ST1Fourv1d_POST:
4383 case AArch64::ST1Fourv2d_POST:
4384 case AArch64::ST1Fourv2s_POST:
4385 case AArch64::ST1Fourv4h_POST:
4386 case AArch64::ST1Fourv4s_POST:
4387 case AArch64::ST1Fourv8b_POST:
4388 case AArch64::ST1Fourv8h_POST:
4389 case AArch64::ST1Onev16b_POST:
4390 case AArch64::ST1Onev1d_POST:
4391 case AArch64::ST1Onev2d_POST:
4392 case AArch64::ST1Onev2s_POST:
4393 case AArch64::ST1Onev4h_POST:
4394 case AArch64::ST1Onev4s_POST:
4395 case AArch64::ST1Onev8b_POST:
4396 case AArch64::ST1Onev8h_POST:
4397 case AArch64::ST1Threev16b_POST:
4398 case AArch64::ST1Threev1d_POST:
4399 case AArch64::ST1Threev2d_POST:
4400 case AArch64::ST1Threev2s_POST:
4401 case AArch64::ST1Threev4h_POST:
4402 case AArch64::ST1Threev4s_POST:
4403 case AArch64::ST1Threev8b_POST:
4404 case AArch64::ST1Threev8h_POST:
4405 case AArch64::ST1Twov16b_POST:
4406 case AArch64::ST1Twov1d_POST:
4407 case AArch64::ST1Twov2d_POST:
4408 case AArch64::ST1Twov2s_POST:
4409 case AArch64::ST1Twov4h_POST:
4410 case AArch64::ST1Twov4s_POST:
4411 case AArch64::ST1Twov8b_POST:
4412 case AArch64::ST1Twov8h_POST:
4413 case AArch64::ST1i16_POST:
4414 case AArch64::ST1i32_POST:
4415 case AArch64::ST1i64_POST:
4416 case AArch64::ST1i8_POST:
4417 case AArch64::ST2GPostIndex:
4418 case AArch64::ST2Twov16b_POST:
4419 case AArch64::ST2Twov2d_POST:
4420 case AArch64::ST2Twov2s_POST:
4421 case AArch64::ST2Twov4h_POST:
4422 case AArch64::ST2Twov4s_POST:
4423 case AArch64::ST2Twov8b_POST:
4424 case AArch64::ST2Twov8h_POST:
4425 case AArch64::ST2i16_POST:
4426 case AArch64::ST2i32_POST:
4427 case AArch64::ST2i64_POST:
4428 case AArch64::ST2i8_POST:
4429 case AArch64::ST3Threev16b_POST:
4430 case AArch64::ST3Threev2d_POST:
4431 case AArch64::ST3Threev2s_POST:
4432 case AArch64::ST3Threev4h_POST:
4433 case AArch64::ST3Threev4s_POST:
4434 case AArch64::ST3Threev8b_POST:
4435 case AArch64::ST3Threev8h_POST:
4436 case AArch64::ST3i16_POST:
4437 case AArch64::ST3i32_POST:
4438 case AArch64::ST3i64_POST:
4439 case AArch64::ST3i8_POST:
4440 case AArch64::ST4Fourv16b_POST:
4441 case AArch64::ST4Fourv2d_POST:
4442 case AArch64::ST4Fourv2s_POST:
4443 case AArch64::ST4Fourv4h_POST:
4444 case AArch64::ST4Fourv4s_POST:
4445 case AArch64::ST4Fourv8b_POST:
4446 case AArch64::ST4Fourv8h_POST:
4447 case AArch64::ST4i16_POST:
4448 case AArch64::ST4i32_POST:
4449 case AArch64::ST4i64_POST:
4450 case AArch64::ST4i8_POST:
4451 case AArch64::STGPostIndex:
4452 case AArch64::STGPpost:
4453 case AArch64::STPDpost:
4454 case AArch64::STPQpost:
4455 case AArch64::STPSpost:
4456 case AArch64::STPWpost:
4457 case AArch64::STPXpost:
4458 case AArch64::STRBBpost:
4459 case AArch64::STRBpost:
4460 case AArch64::STRDpost:
4461 case AArch64::STRHHpost:
4462 case AArch64::STRHpost:
4463 case AArch64::STRQpost:
4464 case AArch64::STRSpost:
4465 case AArch64::STRWpost:
4466 case AArch64::STRXpost:
4467 case AArch64::STZ2GPostIndex:
4468 case AArch64::STZGPostIndex:
4469 return true;
4470 }
4471}
4472
4474 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4475 bool &OffsetIsScalable, TypeSize &Width,
4476 const TargetRegisterInfo *TRI) const {
4477 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4478 // Handle only loads/stores with base register followed by immediate offset.
4479 if (LdSt.getNumExplicitOperands() == 3) {
4480 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4481 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4482 !LdSt.getOperand(2).isImm())
4483 return false;
4484 } else if (LdSt.getNumExplicitOperands() == 4) {
4485 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4486 if (!LdSt.getOperand(1).isReg() ||
4487 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4488 !LdSt.getOperand(3).isImm())
4489 return false;
4490 } else
4491 return false;
4492
4493 // Get the scaling factor for the instruction and set the width for the
4494 // instruction.
4495 TypeSize Scale(0U, false);
4496 int64_t Dummy1, Dummy2;
4497
4498 // If this returns false, then it's an instruction we don't want to handle.
4499 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4500 return false;
4501
4502 // Compute the offset. Offset is calculated as the immediate operand
4503 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4504 // set to 1. Postindex are a special case which have an offset of 0.
4505 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4506 BaseOp = &LdSt.getOperand(2);
4507 Offset = 0;
4508 } else if (LdSt.getNumExplicitOperands() == 3) {
4509 BaseOp = &LdSt.getOperand(1);
4510 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4511 } else {
4512 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4513 BaseOp = &LdSt.getOperand(2);
4514 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4515 }
4516 OffsetIsScalable = Scale.isScalable();
4517
4518 return BaseOp->isReg() || BaseOp->isFI();
4519}
4520
4523 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4524 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4525 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4526 return OfsOp;
4527}
4528
4529bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4530 TypeSize &Width, int64_t &MinOffset,
4531 int64_t &MaxOffset) {
4532 switch (Opcode) {
4533 // Not a memory operation or something we want to handle.
4534 default:
4535 Scale = TypeSize::getFixed(0);
4536 Width = TypeSize::getFixed(0);
4537 MinOffset = MaxOffset = 0;
4538 return false;
4539 // LDR / STR
4540 case AArch64::LDRQui:
4541 case AArch64::STRQui:
4542 Scale = TypeSize::getFixed(16);
4543 Width = TypeSize::getFixed(16);
4544 MinOffset = 0;
4545 MaxOffset = 4095;
4546 break;
4547 case AArch64::LDRXui:
4548 case AArch64::LDRDui:
4549 case AArch64::STRXui:
4550 case AArch64::STRDui:
4551 case AArch64::PRFMui:
4552 Scale = TypeSize::getFixed(8);
4553 Width = TypeSize::getFixed(8);
4554 MinOffset = 0;
4555 MaxOffset = 4095;
4556 break;
4557 case AArch64::LDRWui:
4558 case AArch64::LDRSui:
4559 case AArch64::LDRSWui:
4560 case AArch64::STRWui:
4561 case AArch64::STRSui:
4562 Scale = TypeSize::getFixed(4);
4563 Width = TypeSize::getFixed(4);
4564 MinOffset = 0;
4565 MaxOffset = 4095;
4566 break;
4567 case AArch64::LDRHui:
4568 case AArch64::LDRHHui:
4569 case AArch64::LDRSHWui:
4570 case AArch64::LDRSHXui:
4571 case AArch64::STRHui:
4572 case AArch64::STRHHui:
4573 Scale = TypeSize::getFixed(2);
4574 Width = TypeSize::getFixed(2);
4575 MinOffset = 0;
4576 MaxOffset = 4095;
4577 break;
4578 case AArch64::LDRBui:
4579 case AArch64::LDRBBui:
4580 case AArch64::LDRSBWui:
4581 case AArch64::LDRSBXui:
4582 case AArch64::STRBui:
4583 case AArch64::STRBBui:
4584 Scale = TypeSize::getFixed(1);
4585 Width = TypeSize::getFixed(1);
4586 MinOffset = 0;
4587 MaxOffset = 4095;
4588 break;
4589 // post/pre inc
4590 case AArch64::STRQpre:
4591 case AArch64::LDRQpost:
4592 Scale = TypeSize::getFixed(1);
4593 Width = TypeSize::getFixed(16);
4594 MinOffset = -256;
4595 MaxOffset = 255;
4596 break;
4597 case AArch64::LDRDpost:
4598 case AArch64::LDRDpre:
4599 case AArch64::LDRXpost:
4600 case AArch64::LDRXpre:
4601 case AArch64::STRDpost:
4602 case AArch64::STRDpre:
4603 case AArch64::STRXpost:
4604 case AArch64::STRXpre:
4605 Scale = TypeSize::getFixed(1);
4606 Width = TypeSize::getFixed(8);
4607 MinOffset = -256;
4608 MaxOffset = 255;
4609 break;
4610 case AArch64::STRWpost:
4611 case AArch64::STRWpre:
4612 case AArch64::LDRWpost:
4613 case AArch64::LDRWpre:
4614 case AArch64::STRSpost:
4615 case AArch64::STRSpre:
4616 case AArch64::LDRSpost:
4617 case AArch64::LDRSpre:
4618 Scale = TypeSize::getFixed(1);
4619 Width = TypeSize::getFixed(4);
4620 MinOffset = -256;
4621 MaxOffset = 255;
4622 break;
4623 case AArch64::LDRHpost:
4624 case AArch64::LDRHpre:
4625 case AArch64::STRHpost:
4626 case AArch64::STRHpre:
4627 case AArch64::LDRHHpost:
4628 case AArch64::LDRHHpre:
4629 case AArch64::STRHHpost:
4630 case AArch64::STRHHpre:
4631 Scale = TypeSize::getFixed(1);
4632 Width = TypeSize::getFixed(2);
4633 MinOffset = -256;
4634 MaxOffset = 255;
4635 break;
4636 case AArch64::LDRBpost:
4637 case AArch64::LDRBpre:
4638 case AArch64::STRBpost:
4639 case AArch64::STRBpre:
4640 case AArch64::LDRBBpost:
4641 case AArch64::LDRBBpre:
4642 case AArch64::STRBBpost:
4643 case AArch64::STRBBpre:
4644 Scale = TypeSize::getFixed(1);
4645 Width = TypeSize::getFixed(1);
4646 MinOffset = -256;
4647 MaxOffset = 255;
4648 break;
4649 // Unscaled
4650 case AArch64::LDURQi:
4651 case AArch64::STURQi:
4652 Scale = TypeSize::getFixed(1);
4653 Width = TypeSize::getFixed(16);
4654 MinOffset = -256;
4655 MaxOffset = 255;
4656 break;
4657 case AArch64::LDURXi:
4658 case AArch64::LDURDi:
4659 case AArch64::LDAPURXi:
4660 case AArch64::STURXi:
4661 case AArch64::STURDi:
4662 case AArch64::STLURXi:
4663 case AArch64::PRFUMi:
4664 Scale = TypeSize::getFixed(1);
4665 Width = TypeSize::getFixed(8);
4666 MinOffset = -256;
4667 MaxOffset = 255;
4668 break;
4669 case AArch64::LDURWi:
4670 case AArch64::LDURSi:
4671 case AArch64::LDURSWi:
4672 case AArch64::LDAPURi:
4673 case AArch64::LDAPURSWi:
4674 case AArch64::STURWi:
4675 case AArch64::STURSi:
4676 case AArch64::STLURWi:
4677 Scale = TypeSize::getFixed(1);
4678 Width = TypeSize::getFixed(4);
4679 MinOffset = -256;
4680 MaxOffset = 255;
4681 break;
4682 case AArch64::LDURHi:
4683 case AArch64::LDURHHi:
4684 case AArch64::LDURSHXi:
4685 case AArch64::LDURSHWi:
4686 case AArch64::LDAPURHi:
4687 case AArch64::LDAPURSHWi:
4688 case AArch64::LDAPURSHXi:
4689 case AArch64::STURHi:
4690 case AArch64::STURHHi:
4691 case AArch64::STLURHi:
4692 Scale = TypeSize::getFixed(1);
4693 Width = TypeSize::getFixed(2);
4694 MinOffset = -256;
4695 MaxOffset = 255;
4696 break;
4697 case AArch64::LDURBi:
4698 case AArch64::LDURBBi:
4699 case AArch64::LDURSBXi:
4700 case AArch64::LDURSBWi:
4701 case AArch64::LDAPURBi:
4702 case AArch64::LDAPURSBWi:
4703 case AArch64::LDAPURSBXi:
4704 case AArch64::STURBi:
4705 case AArch64::STURBBi:
4706 case AArch64::STLURBi:
4707 Scale = TypeSize::getFixed(1);
4708 Width = TypeSize::getFixed(1);
4709 MinOffset = -256;
4710 MaxOffset = 255;
4711 break;
4712 // LDP / STP (including pre/post inc)
4713 case AArch64::LDPQi:
4714 case AArch64::LDNPQi:
4715 case AArch64::STPQi:
4716 case AArch64::STNPQi:
4717 case AArch64::LDPQpost:
4718 case AArch64::LDPQpre:
4719 case AArch64::STPQpost:
4720 case AArch64::STPQpre:
4721 Scale = TypeSize::getFixed(16);
4722 Width = TypeSize::getFixed(16 * 2);
4723 MinOffset = -64;
4724 MaxOffset = 63;
4725 break;
4726 case AArch64::LDPXi:
4727 case AArch64::LDPDi:
4728 case AArch64::LDNPXi:
4729 case AArch64::LDNPDi:
4730 case AArch64::STPXi:
4731 case AArch64::STPDi:
4732 case AArch64::STNPXi:
4733 case AArch64::STNPDi:
4734 case AArch64::LDPDpost:
4735 case AArch64::LDPDpre:
4736 case AArch64::LDPXpost:
4737 case AArch64::LDPXpre:
4738 case AArch64::STPDpost:
4739 case AArch64::STPDpre:
4740 case AArch64::STPXpost:
4741 case AArch64::STPXpre:
4742 Scale = TypeSize::getFixed(8);
4743 Width = TypeSize::getFixed(8 * 2);
4744 MinOffset = -64;
4745 MaxOffset = 63;
4746 break;
4747 case AArch64::LDPWi:
4748 case AArch64::LDPSi:
4749 case AArch64::LDNPWi:
4750 case AArch64::LDNPSi:
4751 case AArch64::STPWi:
4752 case AArch64::STPSi:
4753 case AArch64::STNPWi:
4754 case AArch64::STNPSi:
4755 case AArch64::LDPSpost:
4756 case AArch64::LDPSpre:
4757 case AArch64::LDPWpost:
4758 case AArch64::LDPWpre:
4759 case AArch64::STPSpost:
4760 case AArch64::STPSpre:
4761 case AArch64::STPWpost:
4762 case AArch64::STPWpre:
4763 Scale = TypeSize::getFixed(4);
4764 Width = TypeSize::getFixed(4 * 2);
4765 MinOffset = -64;
4766 MaxOffset = 63;
4767 break;
4768 case AArch64::StoreSwiftAsyncContext:
4769 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4770 Scale = TypeSize::getFixed(1);
4771 Width = TypeSize::getFixed(8);
4772 MinOffset = 0;
4773 MaxOffset = 4095;
4774 break;
4775 case AArch64::ADDG:
4776 Scale = TypeSize::getFixed(16);
4777 Width = TypeSize::getFixed(0);
4778 MinOffset = 0;
4779 MaxOffset = 63;
4780 break;
4781 case AArch64::TAGPstack:
4782 Scale = TypeSize::getFixed(16);
4783 Width = TypeSize::getFixed(0);
4784 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4785 // of 63 (not 64!).
4786 MinOffset = -63;
4787 MaxOffset = 63;
4788 break;
4789 case AArch64::LDG:
4790 case AArch64::STGi:
4791 case AArch64::STGPreIndex:
4792 case AArch64::STGPostIndex:
4793 case AArch64::STZGi:
4794 case AArch64::STZGPreIndex:
4795 case AArch64::STZGPostIndex:
4796 Scale = TypeSize::getFixed(16);
4797 Width = TypeSize::getFixed(16);
4798 MinOffset = -256;
4799 MaxOffset = 255;
4800 break;
4801 // SVE
4802 case AArch64::STR_ZZZZXI:
4803 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4804 case AArch64::LDR_ZZZZXI:
4805 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4806 Scale = TypeSize::getScalable(16);
4807 Width = TypeSize::getScalable(16 * 4);
4808 MinOffset = -256;
4809 MaxOffset = 252;
4810 break;
4811 case AArch64::STR_ZZZXI:
4812 case AArch64::LDR_ZZZXI:
4813 Scale = TypeSize::getScalable(16);
4814 Width = TypeSize::getScalable(16 * 3);
4815 MinOffset = -256;
4816 MaxOffset = 253;
4817 break;
4818 case AArch64::STR_ZZXI:
4819 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4820 case AArch64::LDR_ZZXI:
4821 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4822 Scale = TypeSize::getScalable(16);
4823 Width = TypeSize::getScalable(16 * 2);
4824 MinOffset = -256;
4825 MaxOffset = 254;
4826 break;
4827 case AArch64::LDR_PXI:
4828 case AArch64::STR_PXI:
4829 Scale = TypeSize::getScalable(2);
4830 Width = TypeSize::getScalable(2);
4831 MinOffset = -256;
4832 MaxOffset = 255;
4833 break;
4834 case AArch64::LDR_PPXI:
4835 case AArch64::STR_PPXI:
4836 Scale = TypeSize::getScalable(2);
4837 Width = TypeSize::getScalable(2 * 2);
4838 MinOffset = -256;
4839 MaxOffset = 254;
4840 break;
4841 case AArch64::LDR_ZXI:
4842 case AArch64::STR_ZXI:
4843 Scale = TypeSize::getScalable(16);
4844 Width = TypeSize::getScalable(16);
4845 MinOffset = -256;
4846 MaxOffset = 255;
4847 break;
4848 case AArch64::LD1B_IMM:
4849 case AArch64::LD1H_IMM:
4850 case AArch64::LD1W_IMM:
4851 case AArch64::LD1D_IMM:
4852 case AArch64::LDNT1B_ZRI:
4853 case AArch64::LDNT1H_ZRI:
4854 case AArch64::LDNT1W_ZRI:
4855 case AArch64::LDNT1D_ZRI:
4856 case AArch64::ST1B_IMM:
4857 case AArch64::ST1H_IMM:
4858 case AArch64::ST1W_IMM:
4859 case AArch64::ST1D_IMM:
4860 case AArch64::STNT1B_ZRI:
4861 case AArch64::STNT1H_ZRI:
4862 case AArch64::STNT1W_ZRI:
4863 case AArch64::STNT1D_ZRI:
4864 case AArch64::LDNF1B_IMM:
4865 case AArch64::LDNF1H_IMM:
4866 case AArch64::LDNF1W_IMM:
4867 case AArch64::LDNF1D_IMM:
4868 // A full vectors worth of data
4869 // Width = mbytes * elements
4870 Scale = TypeSize::getScalable(16);
4871 Width = TypeSize::getScalable(16);
4872 MinOffset = -8;
4873 MaxOffset = 7;
4874 break;
4875 case AArch64::LD2B_IMM:
4876 case AArch64::LD2H_IMM:
4877 case AArch64::LD2W_IMM:
4878 case AArch64::LD2D_IMM:
4879 case AArch64::ST2B_IMM:
4880 case AArch64::ST2H_IMM:
4881 case AArch64::ST2W_IMM:
4882 case AArch64::ST2D_IMM:
4883 case AArch64::LD1B_2Z_IMM:
4884 case AArch64::LD1B_2Z_STRIDED_IMM:
4885 case AArch64::LD1H_2Z_IMM:
4886 case AArch64::LD1H_2Z_STRIDED_IMM:
4887 case AArch64::LD1W_2Z_IMM:
4888 case AArch64::LD1W_2Z_STRIDED_IMM:
4889 case AArch64::LD1D_2Z_IMM:
4890 case AArch64::LD1D_2Z_STRIDED_IMM:
4891 case AArch64::LD1B_2Z_IMM_PSEUDO:
4892 case AArch64::LD1H_2Z_IMM_PSEUDO:
4893 case AArch64::LD1W_2Z_IMM_PSEUDO:
4894 case AArch64::LD1D_2Z_IMM_PSEUDO:
4895 Scale = TypeSize::getScalable(32);
4896 Width = TypeSize::getScalable(16 * 2);
4897 MinOffset = -8;
4898 MaxOffset = 7;
4899 break;
4900 case AArch64::LD3B_IMM:
4901 case AArch64::LD3H_IMM:
4902 case AArch64::LD3W_IMM:
4903 case AArch64::LD3D_IMM:
4904 case AArch64::ST3B_IMM:
4905 case AArch64::ST3H_IMM:
4906 case AArch64::ST3W_IMM:
4907 case AArch64::ST3D_IMM:
4908 Scale = TypeSize::getScalable(48);
4909 Width = TypeSize::getScalable(16 * 3);
4910 MinOffset = -8;
4911 MaxOffset = 7;
4912 break;
4913 case AArch64::LD4B_IMM:
4914 case AArch64::LD4H_IMM:
4915 case AArch64::LD4W_IMM:
4916 case AArch64::LD4D_IMM:
4917 case AArch64::ST4B_IMM:
4918 case AArch64::ST4H_IMM:
4919 case AArch64::ST4W_IMM:
4920 case AArch64::ST4D_IMM:
4921 case AArch64::LD1B_4Z_IMM:
4922 case AArch64::LD1B_4Z_STRIDED_IMM:
4923 case AArch64::LD1H_4Z_IMM:
4924 case AArch64::LD1H_4Z_STRIDED_IMM:
4925 case AArch64::LD1W_4Z_IMM:
4926 case AArch64::LD1W_4Z_STRIDED_IMM:
4927 case AArch64::LD1D_4Z_IMM:
4928 case AArch64::LD1D_4Z_STRIDED_IMM:
4929 case AArch64::LD1B_4Z_IMM_PSEUDO:
4930 case AArch64::LD1H_4Z_IMM_PSEUDO:
4931 case AArch64::LD1W_4Z_IMM_PSEUDO:
4932 case AArch64::LD1D_4Z_IMM_PSEUDO:
4933 Scale = TypeSize::getScalable(64);
4934 Width = TypeSize::getScalable(16 * 4);
4935 MinOffset = -8;
4936 MaxOffset = 7;
4937 break;
4938 case AArch64::LD1B_H_IMM:
4939 case AArch64::LD1SB_H_IMM:
4940 case AArch64::LD1H_S_IMM:
4941 case AArch64::LD1SH_S_IMM:
4942 case AArch64::LD1W_D_IMM:
4943 case AArch64::LD1SW_D_IMM:
4944 case AArch64::ST1B_H_IMM:
4945 case AArch64::ST1H_S_IMM:
4946 case AArch64::ST1W_D_IMM:
4947 case AArch64::LDNF1B_H_IMM:
4948 case AArch64::LDNF1SB_H_IMM:
4949 case AArch64::LDNF1H_S_IMM:
4950 case AArch64::LDNF1SH_S_IMM:
4951 case AArch64::LDNF1W_D_IMM:
4952 case AArch64::LDNF1SW_D_IMM:
4953 // A half vector worth of data
4954 // Width = mbytes * elements
4955 Scale = TypeSize::getScalable(8);
4956 Width = TypeSize::getScalable(8);
4957 MinOffset = -8;
4958 MaxOffset = 7;
4959 break;
4960 case AArch64::LD1B_S_IMM:
4961 case AArch64::LD1SB_S_IMM:
4962 case AArch64::LD1H_D_IMM:
4963 case AArch64::LD1SH_D_IMM:
4964 case AArch64::ST1B_S_IMM:
4965 case AArch64::ST1H_D_IMM:
4966 case AArch64::LDNF1B_S_IMM:
4967 case AArch64::LDNF1SB_S_IMM:
4968 case AArch64::LDNF1H_D_IMM:
4969 case AArch64::LDNF1SH_D_IMM:
4970 // A quarter vector worth of data
4971 // Width = mbytes * elements
4972 Scale = TypeSize::getScalable(4);
4973 Width = TypeSize::getScalable(4);
4974 MinOffset = -8;
4975 MaxOffset = 7;
4976 break;
4977 case AArch64::LD1B_D_IMM:
4978 case AArch64::LD1SB_D_IMM:
4979 case AArch64::ST1B_D_IMM:
4980 case AArch64::LDNF1B_D_IMM:
4981 case AArch64::LDNF1SB_D_IMM:
4982 // A eighth vector worth of data
4983 // Width = mbytes * elements
4984 Scale = TypeSize::getScalable(2);
4985 Width = TypeSize::getScalable(2);
4986 MinOffset = -8;
4987 MaxOffset = 7;
4988 break;
4989 case AArch64::ST2Gi:
4990 case AArch64::ST2GPreIndex:
4991 case AArch64::ST2GPostIndex:
4992 case AArch64::STZ2Gi:
4993 case AArch64::STZ2GPreIndex:
4994 case AArch64::STZ2GPostIndex:
4995 Scale = TypeSize::getFixed(16);
4996 Width = TypeSize::getFixed(32);
4997 MinOffset = -256;
4998 MaxOffset = 255;
4999 break;
5000 case AArch64::STGPi:
5001 case AArch64::STGPpost:
5002 case AArch64::STGPpre:
5003 Scale = TypeSize::getFixed(16);
5004 Width = TypeSize::getFixed(16);
5005 MinOffset = -64;
5006 MaxOffset = 63;
5007 break;
5008 case AArch64::LD1RB_IMM:
5009 case AArch64::LD1RB_H_IMM:
5010 case AArch64::LD1RB_S_IMM:
5011 case AArch64::LD1RB_D_IMM:
5012 case AArch64::LD1RSB_H_IMM:
5013 case AArch64::LD1RSB_S_IMM:
5014 case AArch64::LD1RSB_D_IMM:
5015 Scale = TypeSize::getFixed(1);
5016 Width = TypeSize::getFixed(1);
5017 MinOffset = 0;
5018 MaxOffset = 63;
5019 break;
5020 case AArch64::LD1RH_IMM:
5021 case AArch64::LD1RH_S_IMM:
5022 case AArch64::LD1RH_D_IMM:
5023 case AArch64::LD1RSH_S_IMM:
5024 case AArch64::LD1RSH_D_IMM:
5025 Scale = TypeSize::getFixed(2);
5026 Width = TypeSize::getFixed(2);
5027 MinOffset = 0;
5028 MaxOffset = 63;
5029 break;
5030 case AArch64::LD1RW_IMM:
5031 case AArch64::LD1RW_D_IMM:
5032 case AArch64::LD1RSW_IMM:
5033 Scale = TypeSize::getFixed(4);
5034 Width = TypeSize::getFixed(4);
5035 MinOffset = 0;
5036 MaxOffset = 63;
5037 break;
5038 case AArch64::LD1RD_IMM:
5039 Scale = TypeSize::getFixed(8);
5040 Width = TypeSize::getFixed(8);
5041 MinOffset = 0;
5042 MaxOffset = 63;
5043 break;
5044 }
5045
5046 return true;
5047}
5048
5049// Scaling factor for unscaled load or store.
5051 switch (Opc) {
5052 default:
5053 llvm_unreachable("Opcode has unknown scale!");
5054 case AArch64::LDRBui:
5055 case AArch64::LDRBBui:
5056 case AArch64::LDURBBi:
5057 case AArch64::LDRSBWui:
5058 case AArch64::LDURSBWi:
5059 case AArch64::STRBui:
5060 case AArch64::STRBBui:
5061 case AArch64::STURBBi:
5062 return 1;
5063 case AArch64::LDRHui:
5064 case AArch64::LDRHHui:
5065 case AArch64::LDURHHi:
5066 case AArch64::LDRSHWui:
5067 case AArch64::LDURSHWi:
5068 case AArch64::STRHui:
5069 case AArch64::STRHHui:
5070 case AArch64::STURHHi:
5071 return 2;
5072 case AArch64::LDRSui:
5073 case AArch64::LDURSi:
5074 case AArch64::LDRSpre:
5075 case AArch64::LDRSWui:
5076 case AArch64::LDURSWi:
5077 case AArch64::LDRSWpre:
5078 case AArch64::LDRWpre:
5079 case AArch64::LDRWui:
5080 case AArch64::LDURWi:
5081 case AArch64::STRSui:
5082 case AArch64::STURSi:
5083 case AArch64::STRSpre:
5084 case AArch64::STRWui:
5085 case AArch64::STURWi:
5086 case AArch64::STRWpre:
5087 case AArch64::LDPSi:
5088 case AArch64::LDPSWi:
5089 case AArch64::LDPWi:
5090 case AArch64::STPSi:
5091 case AArch64::STPWi:
5092 return 4;
5093 case AArch64::LDRDui:
5094 case AArch64::LDURDi:
5095 case AArch64::LDRDpre:
5096 case AArch64::LDRXui:
5097 case AArch64::LDURXi:
5098 case AArch64::LDRXpre:
5099 case AArch64::STRDui:
5100 case AArch64::STURDi:
5101 case AArch64::STRDpre:
5102 case AArch64::STRXui:
5103 case AArch64::STURXi:
5104 case AArch64::STRXpre:
5105 case AArch64::LDPDi:
5106 case AArch64::LDPXi:
5107 case AArch64::STPDi:
5108 case AArch64::STPXi:
5109 return 8;
5110 case AArch64::LDRQui:
5111 case AArch64::LDURQi:
5112 case AArch64::STRQui:
5113 case AArch64::STURQi:
5114 case AArch64::STRQpre:
5115 case AArch64::LDPQi:
5116 case AArch64::LDRQpre:
5117 case AArch64::STPQi:
5118 case AArch64::STGi:
5119 case AArch64::STZGi:
5120 case AArch64::ST2Gi:
5121 case AArch64::STZ2Gi:
5122 case AArch64::STGPi:
5123 return 16;
5124 }
5125}
5126
5128 switch (MI.getOpcode()) {
5129 default:
5130 return false;
5131 case AArch64::LDRWpre:
5132 case AArch64::LDRXpre:
5133 case AArch64::LDRSWpre:
5134 case AArch64::LDRSpre:
5135 case AArch64::LDRDpre:
5136 case AArch64::LDRQpre:
5137 return true;
5138 }
5139}
5140
5142 switch (MI.getOpcode()) {
5143 default:
5144 return false;
5145 case AArch64::STRWpre:
5146 case AArch64::STRXpre:
5147 case AArch64::STRSpre:
5148 case AArch64::STRDpre:
5149 case AArch64::STRQpre:
5150 return true;
5151 }
5152}
5153
5155 return isPreLd(MI) || isPreSt(MI);
5156}
5157
5159 switch (MI.getOpcode()) {
5160 default:
5161 return false;
5162 case AArch64::LDURBBi:
5163 case AArch64::LDURHHi:
5164 case AArch64::LDURWi:
5165 case AArch64::LDRBBui:
5166 case AArch64::LDRHHui:
5167 case AArch64::LDRWui:
5168 case AArch64::LDRBBroX:
5169 case AArch64::LDRHHroX:
5170 case AArch64::LDRWroX:
5171 case AArch64::LDRBBroW:
5172 case AArch64::LDRHHroW:
5173 case AArch64::LDRWroW:
5174 return true;
5175 }
5176}
5177
5179 switch (MI.getOpcode()) {
5180 default:
5181 return false;
5182 case AArch64::LDURSBWi:
5183 case AArch64::LDURSHWi:
5184 case AArch64::LDURSBXi:
5185 case AArch64::LDURSHXi:
5186 case AArch64::LDURSWi:
5187 case AArch64::LDRSBWui:
5188 case AArch64::LDRSHWui:
5189 case AArch64::LDRSBXui:
5190 case AArch64::LDRSHXui:
5191 case AArch64::LDRSWui:
5192 case AArch64::LDRSBWroX:
5193 case AArch64::LDRSHWroX:
5194 case AArch64::LDRSBXroX:
5195 case AArch64::LDRSHXroX:
5196 case AArch64::LDRSWroX:
5197 case AArch64::LDRSBWroW:
5198 case AArch64::LDRSHWroW:
5199 case AArch64::LDRSBXroW:
5200 case AArch64::LDRSHXroW:
5201 case AArch64::LDRSWroW:
5202 return true;
5203 }
5204}
5205
5207 switch (MI.getOpcode()) {
5208 default:
5209 return false;
5210 case AArch64::LDPSi:
5211 case AArch64::LDPSWi:
5212 case AArch64::LDPDi:
5213 case AArch64::LDPQi:
5214 case AArch64::LDPWi:
5215 case AArch64::LDPXi:
5216 case AArch64::STPSi:
5217 case AArch64::STPDi:
5218 case AArch64::STPQi:
5219 case AArch64::STPWi:
5220 case AArch64::STPXi:
5221 case AArch64::STGPi:
5222 return true;
5223 }
5224}
5225
5227 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
5228 unsigned Idx =
5230 : 1;
5231 return MI.getOperand(Idx);
5232}
5233
5234const MachineOperand &
5236 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
5237 unsigned Idx =
5239 : 2;
5240 return MI.getOperand(Idx);
5241}
5242
5243const MachineOperand &
5245 switch (MI.getOpcode()) {
5246 default:
5247 llvm_unreachable("Unexpected opcode");
5248 case AArch64::LDRBroX:
5249 case AArch64::LDRBBroX:
5250 case AArch64::LDRSBXroX:
5251 case AArch64::LDRSBWroX:
5252 case AArch64::LDRHroX:
5253 case AArch64::LDRHHroX:
5254 case AArch64::LDRSHXroX:
5255 case AArch64::LDRSHWroX:
5256 case AArch64::LDRWroX:
5257 case AArch64::LDRSroX:
5258 case AArch64::LDRSWroX:
5259 case AArch64::LDRDroX:
5260 case AArch64::LDRXroX:
5261 case AArch64::LDRQroX:
5262 return MI.getOperand(4);
5263 }
5264}
5265
5267 Register Reg) {
5268 if (MI.getParent() == nullptr)
5269 return nullptr;
5270 const MachineFunction *MF = MI.getParent()->getParent();
5271 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
5272}
5273
5275 auto IsHFPR = [&](const MachineOperand &Op) {
5276 if (!Op.isReg())
5277 return false;
5278 auto Reg = Op.getReg();
5279 if (Reg.isPhysical())
5280 return AArch64::FPR16RegClass.contains(Reg);
5281 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5282 return TRC == &AArch64::FPR16RegClass ||
5283 TRC == &AArch64::FPR16_loRegClass;
5284 };
5285 return llvm::any_of(MI.operands(), IsHFPR);
5286}
5287
5289 auto IsQFPR = [&](const MachineOperand &Op) {
5290 if (!Op.isReg())
5291 return false;
5292 auto Reg = Op.getReg();
5293 if (Reg.isPhysical())
5294 return AArch64::FPR128RegClass.contains(Reg);
5295 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5296 return TRC == &AArch64::FPR128RegClass ||
5297 TRC == &AArch64::FPR128_loRegClass;
5298 };
5299 return llvm::any_of(MI.operands(), IsQFPR);
5300}
5301
5303 switch (MI.getOpcode()) {
5304 case AArch64::BRK:
5305 case AArch64::HLT:
5306 case AArch64::PACIASP:
5307 case AArch64::PACIBSP:
5308 // Implicit BTI behavior.
5309 return true;
5310 case AArch64::PAUTH_PROLOGUE:
5311 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
5312 return true;
5313 case AArch64::HINT: {
5314 unsigned Imm = MI.getOperand(0).getImm();
5315 // Explicit BTI instruction.
5316 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
5317 return true;
5318 // PACI(A|B)SP instructions.
5319 if (Imm == 25 || Imm == 27)
5320 return true;
5321 return false;
5322 }
5323 default:
5324 return false;
5325 }
5326}
5327
5329 if (Reg == 0)
5330 return false;
5331 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
5332 return AArch64::FPR128RegClass.contains(Reg) ||
5333 AArch64::FPR64RegClass.contains(Reg) ||
5334 AArch64::FPR32RegClass.contains(Reg) ||
5335 AArch64::FPR16RegClass.contains(Reg) ||
5336 AArch64::FPR8RegClass.contains(Reg);
5337}
5338
5340 auto IsFPR = [&](const MachineOperand &Op) {
5341 if (!Op.isReg())
5342 return false;
5343 auto Reg = Op.getReg();
5344 if (Reg.isPhysical())
5345 return isFpOrNEON(Reg);
5346
5347 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5348 return TRC == &AArch64::FPR128RegClass ||
5349 TRC == &AArch64::FPR128_loRegClass ||
5350 TRC == &AArch64::FPR64RegClass ||
5351 TRC == &AArch64::FPR64_loRegClass ||
5352 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
5353 TRC == &AArch64::FPR8RegClass;
5354 };
5355 return llvm::any_of(MI.operands(), IsFPR);
5356}
5357
5358// Scale the unscaled offsets. Returns false if the unscaled offset can't be
5359// scaled.
5360static bool scaleOffset(unsigned Opc, int64_t &Offset) {
5362
5363 // If the byte-offset isn't a multiple of the stride, we can't scale this
5364 // offset.
5365 if (Offset % Scale != 0)
5366 return false;
5367
5368 // Convert the byte-offset used by unscaled into an "element" offset used
5369 // by the scaled pair load/store instructions.
5370 Offset /= Scale;
5371 return true;
5372}
5373
5374static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
5375 if (FirstOpc == SecondOpc)
5376 return true;
5377 // We can also pair sign-ext and zero-ext instructions.
5378 switch (FirstOpc) {
5379 default:
5380 return false;
5381 case AArch64::STRSui:
5382 case AArch64::STURSi:
5383 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
5384 case AArch64::STRDui:
5385 case AArch64::STURDi:
5386 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
5387 case AArch64::STRQui:
5388 case AArch64::STURQi:
5389 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
5390 case AArch64::STRWui:
5391 case AArch64::STURWi:
5392 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
5393 case AArch64::STRXui:
5394 case AArch64::STURXi:
5395 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
5396 case AArch64::LDRSui:
5397 case AArch64::LDURSi:
5398 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
5399 case AArch64::LDRDui:
5400 case AArch64::LDURDi:
5401 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
5402 case AArch64::LDRQui:
5403 case AArch64::LDURQi:
5404 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
5405 case AArch64::LDRWui:
5406 case AArch64::LDURWi:
5407 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
5408 case AArch64::LDRSWui:
5409 case AArch64::LDURSWi:
5410 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
5411 case AArch64::LDRXui:
5412 case AArch64::LDURXi:
5413 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
5414 }
5415 // These instructions can't be paired based on their opcodes.
5416 return false;
5417}
5418
5419static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
5420 int64_t Offset1, unsigned Opcode1, int FI2,
5421 int64_t Offset2, unsigned Opcode2) {
5422 // Accesses through fixed stack object frame indices may access a different
5423 // fixed stack slot. Check that the object offsets + offsets match.
5424 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
5425 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
5426 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
5427 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
5428 // Convert to scaled object offsets.
5429 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
5430 if (ObjectOffset1 % Scale1 != 0)
5431 return false;
5432 ObjectOffset1 /= Scale1;
5433 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
5434 if (ObjectOffset2 % Scale2 != 0)
5435 return false;
5436 ObjectOffset2 /= Scale2;
5437 ObjectOffset1 += Offset1;
5438 ObjectOffset2 += Offset2;
5439 return ObjectOffset1 + 1 == ObjectOffset2;
5440 }
5441
5442 return FI1 == FI2;
5443}
5444
5445/// Detect opportunities for ldp/stp formation.
5446///
5447/// Only called for LdSt for which getMemOperandWithOffset returns true.
5449 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
5450 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
5451 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
5452 unsigned NumBytes) const {
5453 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
5454 const MachineOperand &BaseOp1 = *BaseOps1.front();
5455 const MachineOperand &BaseOp2 = *BaseOps2.front();
5456 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
5457 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
5458 if (BaseOp1.getType() != BaseOp2.getType())
5459 return false;
5460
5461 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
5462 "Only base registers and frame indices are supported.");
5463
5464 // Check for both base regs and base FI.
5465 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
5466 return false;
5467
5468 // Only cluster up to a single pair.
5469 if (ClusterSize > 2)
5470 return false;
5471
5472 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
5473 return false;
5474
5475 // Can we pair these instructions based on their opcodes?
5476 unsigned FirstOpc = FirstLdSt.getOpcode();
5477 unsigned SecondOpc = SecondLdSt.getOpcode();
5478 if (!canPairLdStOpc(FirstOpc, SecondOpc))
5479 return false;
5480
5481 // Can't merge volatiles or load/stores that have a hint to avoid pair
5482 // formation, for example.
5483 if (!isCandidateToMergeOrPair(FirstLdSt) ||
5484 !isCandidateToMergeOrPair(SecondLdSt))
5485 return false;
5486
5487 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
5488 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
5489 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
5490 return false;
5491
5492 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
5493 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
5494 return false;
5495
5496 // Pairwise instructions have a 7-bit signed offset field.
5497 if (Offset1 > 63 || Offset1 < -64)
5498 return false;
5499
5500 // The caller should already have ordered First/SecondLdSt by offset.
5501 // Note: except for non-equal frame index bases
5502 if (BaseOp1.isFI()) {
5503 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
5504 "Caller should have ordered offsets.");
5505
5506 const MachineFrameInfo &MFI =
5507 FirstLdSt.getParent()->getParent()->getFrameInfo();
5508 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
5509 BaseOp2.getIndex(), Offset2, SecondOpc);
5510 }
5511
5512 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
5513
5514 return Offset1 + 1 == Offset2;
5515}
5516
5518 MCRegister Reg, unsigned SubIdx,
5519 RegState State,
5520 const TargetRegisterInfo *TRI) {
5521 if (!SubIdx)
5522 return MIB.addReg(Reg, State);
5523
5524 if (Reg.isPhysical())
5525 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
5526 return MIB.addReg(Reg, State, SubIdx);
5527}
5528
5529static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5530 unsigned NumRegs) {
5531 // We really want the positive remainder mod 32 here, that happens to be
5532 // easily obtainable with a mask.
5533 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5534}
5535
5538 const DebugLoc &DL, MCRegister DestReg,
5539 MCRegister SrcReg, bool KillSrc,
5540 unsigned Opcode,
5541 ArrayRef<unsigned> Indices) const {
5542 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5544 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5545 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5546 unsigned NumRegs = Indices.size();
5547
5548 int SubReg = 0, End = NumRegs, Incr = 1;
5549 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5550 SubReg = NumRegs - 1;
5551 End = -1;
5552 Incr = -1;
5553 }
5554
5555 for (; SubReg != End; SubReg += Incr) {
5556 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5557 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5558 AddSubReg(MIB, SrcReg, Indices[SubReg], {}, TRI);
5559 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5560 }
5561}
5562
5565 const DebugLoc &DL, MCRegister DestReg,
5566 MCRegister SrcReg, bool KillSrc,
5567 unsigned Opcode, unsigned ZeroReg,
5568 llvm::ArrayRef<unsigned> Indices) const {
5570 unsigned NumRegs = Indices.size();
5571
5572#ifndef NDEBUG
5573 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5574 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5575 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5576 "GPR reg sequences should not be able to overlap");
5577#endif
5578
5579 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5580 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5581 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5582 MIB.addReg(ZeroReg);
5583 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5584 MIB.addImm(0);
5585 }
5586}
5587
5588/// Returns true if the instruction at I is in a streaming call site region,
5589/// within a single basic block.
5590/// A "call site streaming region" starts after smstart and ends at smstop
5591/// around a call to a streaming function. This walks backward from I.
5594 MachineFunction &MF = *MBB.getParent();
5596 if (!AFI->hasStreamingModeChanges())
5597 return false;
5598 // Walk backwards to find smstart/smstop
5599 for (MachineInstr &MI : reverse(make_range(MBB.begin(), I))) {
5600 unsigned Opc = MI.getOpcode();
5601 if (Opc == AArch64::MSRpstatesvcrImm1 || Opc == AArch64::MSRpstatePseudo) {
5602 // Check if this is SM change (not ZA)
5603 int64_t PState = MI.getOperand(0).getImm();
5604 if (PState == AArch64SVCR::SVCRSM || PState == AArch64SVCR::SVCRSMZA) {
5605 // Operand 1 is 1 for start, 0 for stop
5606 return MI.getOperand(1).getImm() == 1;
5607 }
5608 }
5609 }
5610 return false;
5611}
5612
5613/// Returns true if in a streaming call site region without SME-FA64.
5614static bool mustAvoidNeonAtMBBI(const AArch64Subtarget &Subtarget,
5617 return !Subtarget.hasSMEFA64() && isInStreamingCallSiteRegion(MBB, I);
5618}
5619
5622 const DebugLoc &DL, Register DestReg,
5623 Register SrcReg, bool KillSrc,
5624 bool RenamableDest,
5625 bool RenamableSrc) const {
5626 ++NumCopyInstrs;
5627 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5628 AArch64::GPR32spRegClass.contains(SrcReg)) {
5629 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5630 // If either operand is WSP, expand to ADD #0.
5631 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5632 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5633 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5634 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5635 &AArch64::GPR64spRegClass);
5636 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5637 &AArch64::GPR64spRegClass);
5638 // This instruction is reading and writing X registers. This may upset
5639 // the register scavenger and machine verifier, so we need to indicate
5640 // that we are reading an undefined value from SrcRegX, but a proper
5641 // value from SrcReg.
5642 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5643 .addReg(SrcRegX, RegState::Undef)
5644 .addImm(0)
5646 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5647 ++NumZCRegMoveInstrsGPR;
5648 } else {
5649 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5650 .addReg(SrcReg, getKillRegState(KillSrc))
5651 .addImm(0)
5653 if (Subtarget.hasZeroCycleRegMoveGPR32())
5654 ++NumZCRegMoveInstrsGPR;
5655 }
5656 } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5657 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5658 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5659 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5660 &AArch64::GPR64spRegClass);
5661 assert(DestRegX.isValid() && "Destination super-reg not valid");
5662 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5663 &AArch64::GPR64spRegClass);
5664 assert(SrcRegX.isValid() && "Source super-reg not valid");
5665 // This instruction is reading and writing X registers. This may upset
5666 // the register scavenger and machine verifier, so we need to indicate
5667 // that we are reading an undefined value from SrcRegX, but a proper
5668 // value from SrcReg.
5669 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5670 .addReg(AArch64::XZR)
5671 .addReg(SrcRegX, RegState::Undef)
5672 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5673 ++NumZCRegMoveInstrsGPR;
5674 } else {
5675 // Otherwise, expand to ORR WZR.
5676 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5677 .addReg(AArch64::WZR)
5678 .addReg(SrcReg, getKillRegState(KillSrc));
5679 if (Subtarget.hasZeroCycleRegMoveGPR32())
5680 ++NumZCRegMoveInstrsGPR;
5681 }
5682 return;
5683 }
5684
5685 // GPR32 zeroing
5686 if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
5687 if (Subtarget.hasZeroCycleZeroingGPR64() &&
5688 !Subtarget.hasZeroCycleZeroingGPR32()) {
5689 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5690 &AArch64::GPR64spRegClass);
5691 assert(DestRegX.isValid() && "Destination super-reg not valid");
5692 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX)
5693 .addImm(0)
5695 ++NumZCZeroingInstrsGPR;
5696 } else if (Subtarget.hasZeroCycleZeroingGPR32()) {
5697 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5698 .addImm(0)
5700 ++NumZCZeroingInstrsGPR;
5701 } else {
5702 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5703 .addReg(AArch64::WZR)
5704 .addReg(AArch64::WZR);
5705 }
5706 return;
5707 }
5708
5709 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5710 AArch64::GPR64spRegClass.contains(SrcReg)) {
5711 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5712 // If either operand is SP, expand to ADD #0.
5713 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5714 .addReg(SrcReg, getKillRegState(KillSrc))
5715 .addImm(0)
5717 if (Subtarget.hasZeroCycleRegMoveGPR64())
5718 ++NumZCRegMoveInstrsGPR;
5719 } else {
5720 // Otherwise, expand to ORR XZR.
5721 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5722 .addReg(AArch64::XZR)
5723 .addReg(SrcReg, getKillRegState(KillSrc));
5724 if (Subtarget.hasZeroCycleRegMoveGPR64())
5725 ++NumZCRegMoveInstrsGPR;
5726 }
5727 return;
5728 }
5729
5730 // GPR64 zeroing
5731 if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
5732 if (Subtarget.hasZeroCycleZeroingGPR64()) {
5733 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5734 .addImm(0)
5736 ++NumZCZeroingInstrsGPR;
5737 } else {
5738 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5739 .addReg(AArch64::XZR)
5740 .addReg(AArch64::XZR);
5741 }
5742 return;
5743 }
5744
5745 // Copy a Predicate register by ORRing with itself.
5746 if (AArch64::PPRRegClass.contains(DestReg) &&
5747 AArch64::PPRRegClass.contains(SrcReg)) {
5748 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5749 "Unexpected SVE register.");
5750 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5751 .addReg(SrcReg) // Pg
5752 .addReg(SrcReg)
5753 .addReg(SrcReg, getKillRegState(KillSrc));
5754 return;
5755 }
5756
5757 // Copy a predicate-as-counter register by ORRing with itself as if it
5758 // were a regular predicate (mask) register.
5759 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5760 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5761 if (DestIsPNR || SrcIsPNR) {
5762 auto ToPPR = [](MCRegister R) -> MCRegister {
5763 return (R - AArch64::PN0) + AArch64::P0;
5764 };
5765 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5766 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5767
5768 if (PPRSrcReg != PPRDestReg) {
5769 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5770 .addReg(PPRSrcReg) // Pg
5771 .addReg(PPRSrcReg)
5772 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5773 if (DestIsPNR)
5774 NewMI.addDef(DestReg, RegState::Implicit);
5775 }
5776 return;
5777 }
5778
5779 // Copy a Z register by ORRing with itself.
5780 if (AArch64::ZPRRegClass.contains(DestReg) &&
5781 AArch64::ZPRRegClass.contains(SrcReg)) {
5782 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5783 "Unexpected SVE register.");
5784 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5785 .addReg(SrcReg)
5786 .addReg(SrcReg, getKillRegState(KillSrc));
5787 return;
5788 }
5789
5790 // Copy a Z register pair by copying the individual sub-registers.
5791 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5792 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5793 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5794 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5795 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5796 "Unexpected SVE register.");
5797 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5798 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5799 Indices);
5800 return;
5801 }
5802
5803 // Copy a Z register triple by copying the individual sub-registers.
5804 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5805 AArch64::ZPR3RegClass.contains(SrcReg)) {
5806 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5807 "Unexpected SVE register.");
5808 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5809 AArch64::zsub2};
5810 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5811 Indices);
5812 return;
5813 }
5814
5815 // Copy a Z register quad by copying the individual sub-registers.
5816 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5817 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5818 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5819 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5820 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5821 "Unexpected SVE register.");
5822 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5823 AArch64::zsub2, AArch64::zsub3};
5824 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5825 Indices);
5826 return;
5827 }
5828
5829 // Copy a DDDD register quad by copying the individual sub-registers.
5830 if (AArch64::DDDDRegClass.contains(DestReg) &&
5831 AArch64::DDDDRegClass.contains(SrcReg)) {
5832 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5833 AArch64::dsub2, AArch64::dsub3};
5834 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5835 Indices);
5836 return;
5837 }
5838
5839 // Copy a DDD register triple by copying the individual sub-registers.
5840 if (AArch64::DDDRegClass.contains(DestReg) &&
5841 AArch64::DDDRegClass.contains(SrcReg)) {
5842 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5843 AArch64::dsub2};
5844 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5845 Indices);
5846 return;
5847 }
5848
5849 // Copy a DD register pair by copying the individual sub-registers.
5850 if (AArch64::DDRegClass.contains(DestReg) &&
5851 AArch64::DDRegClass.contains(SrcReg)) {
5852 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5853 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5854 Indices);
5855 return;
5856 }
5857
5858 // Copy a QQQQ register quad by copying the individual sub-registers.
5859 if (AArch64::QQQQRegClass.contains(DestReg) &&
5860 AArch64::QQQQRegClass.contains(SrcReg)) {
5861 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5862 AArch64::qsub2, AArch64::qsub3};
5863 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5864 Indices);
5865 return;
5866 }
5867
5868 // Copy a QQQ register triple by copying the individual sub-registers.
5869 if (AArch64::QQQRegClass.contains(DestReg) &&
5870 AArch64::QQQRegClass.contains(SrcReg)) {
5871 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5872 AArch64::qsub2};
5873 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5874 Indices);
5875 return;
5876 }
5877
5878 // Copy a QQ register pair by copying the individual sub-registers.
5879 if (AArch64::QQRegClass.contains(DestReg) &&
5880 AArch64::QQRegClass.contains(SrcReg)) {
5881 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5882 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5883 Indices);
5884 return;
5885 }
5886
5887 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5888 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5889 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5890 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5891 AArch64::XZR, Indices);
5892 return;
5893 }
5894
5895 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5896 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5897 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5898 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5899 AArch64::WZR, Indices);
5900 return;
5901 }
5902
5903 if (AArch64::FPR128RegClass.contains(DestReg) &&
5904 AArch64::FPR128RegClass.contains(SrcReg)) {
5905 // In streaming regions, NEON is illegal but streaming-SVE is available.
5906 // Use SVE for copies if we're in a streaming region and SME is available.
5907 // With +sme-fa64, NEON is legal in streaming mode so we can use it.
5908 if ((Subtarget.isSVEorStreamingSVEAvailable() &&
5909 !Subtarget.isNeonAvailable()) ||
5910 mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5911 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5912 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5913 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5914 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5915 } else if (Subtarget.isNeonAvailable()) {
5916 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5917 .addReg(SrcReg)
5918 .addReg(SrcReg, getKillRegState(KillSrc));
5919 if (Subtarget.hasZeroCycleRegMoveFPR128())
5920 ++NumZCRegMoveInstrsFPR;
5921 } else {
5922 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5923 .addReg(AArch64::SP, RegState::Define)
5924 .addReg(SrcReg, getKillRegState(KillSrc))
5925 .addReg(AArch64::SP)
5926 .addImm(-16);
5927 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5928 .addReg(AArch64::SP, RegState::Define)
5929 .addReg(DestReg, RegState::Define)
5930 .addReg(AArch64::SP)
5931 .addImm(16);
5932 }
5933 return;
5934 }
5935
5936 if (AArch64::FPR64RegClass.contains(DestReg) &&
5937 AArch64::FPR64RegClass.contains(SrcReg)) {
5938 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5939 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5940 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
5941 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5942 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
5943 &AArch64::FPR128RegClass);
5944 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
5945 &AArch64::FPR128RegClass);
5946 // This instruction is reading and writing Q registers. This may upset
5947 // the register scavenger and machine verifier, so we need to indicate
5948 // that we are reading an undefined value from SrcRegQ, but a proper
5949 // value from SrcReg.
5950 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5951 .addReg(SrcRegQ, RegState::Undef)
5952 .addReg(SrcRegQ, RegState::Undef)
5953 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5954 ++NumZCRegMoveInstrsFPR;
5955 } else {
5956 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5957 .addReg(SrcReg, getKillRegState(KillSrc));
5958 if (Subtarget.hasZeroCycleRegMoveFPR64())
5959 ++NumZCRegMoveInstrsFPR;
5960 }
5961 return;
5962 }
5963
5964 if (AArch64::FPR32RegClass.contains(DestReg) &&
5965 AArch64::FPR32RegClass.contains(SrcReg)) {
5966 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5967 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5968 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
5969 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5970 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5971 &AArch64::FPR128RegClass);
5972 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5973 &AArch64::FPR128RegClass);
5974 // This instruction is reading and writing Q registers. This may upset
5975 // the register scavenger and machine verifier, so we need to indicate
5976 // that we are reading an undefined value from SrcRegQ, but a proper
5977 // value from SrcReg.
5978 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5979 .addReg(SrcRegQ, RegState::Undef)
5980 .addReg(SrcRegQ, RegState::Undef)
5981 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5982 ++NumZCRegMoveInstrsFPR;
5983 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5984 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5985 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5986 &AArch64::FPR64RegClass);
5987 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5988 &AArch64::FPR64RegClass);
5989 // This instruction is reading and writing D registers. This may upset
5990 // the register scavenger and machine verifier, so we need to indicate
5991 // that we are reading an undefined value from SrcRegD, but a proper
5992 // value from SrcReg.
5993 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5994 .addReg(SrcRegD, RegState::Undef)
5995 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5996 ++NumZCRegMoveInstrsFPR;
5997 } else {
5998 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5999 .addReg(SrcReg, getKillRegState(KillSrc));
6000 if (Subtarget.hasZeroCycleRegMoveFPR32())
6001 ++NumZCRegMoveInstrsFPR;
6002 }
6003 return;
6004 }
6005
6006 if (AArch64::FPR16RegClass.contains(DestReg) &&
6007 AArch64::FPR16RegClass.contains(SrcReg)) {
6008 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
6009 !Subtarget.hasZeroCycleRegMoveFPR64() &&
6010 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
6011 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
6012 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
6013 &AArch64::FPR128RegClass);
6014 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
6015 &AArch64::FPR128RegClass);
6016 // This instruction is reading and writing Q registers. This may upset
6017 // the register scavenger and machine verifier, so we need to indicate
6018 // that we are reading an undefined value from SrcRegQ, but a proper
6019 // value from SrcReg.
6020 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
6021 .addReg(SrcRegQ, RegState::Undef)
6022 .addReg(SrcRegQ, RegState::Undef)
6023 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6024 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
6025 !Subtarget.hasZeroCycleRegMoveFPR32()) {
6026 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
6027 &AArch64::FPR64RegClass);
6028 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
6029 &AArch64::FPR64RegClass);
6030 // This instruction is reading and writing D registers. This may upset
6031 // the register scavenger and machine verifier, so we need to indicate
6032 // that we are reading an undefined value from SrcRegD, but a proper
6033 // value from SrcReg.
6034 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
6035 .addReg(SrcRegD, RegState::Undef)
6036 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6037 } else {
6038 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
6039 &AArch64::FPR32RegClass);
6040 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
6041 &AArch64::FPR32RegClass);
6042 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
6043 .addReg(SrcReg, getKillRegState(KillSrc));
6044 }
6045 return;
6046 }
6047
6048 if (AArch64::FPR8RegClass.contains(DestReg) &&
6049 AArch64::FPR8RegClass.contains(SrcReg)) {
6050 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
6051 !Subtarget.hasZeroCycleRegMoveFPR64() &&
6052 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
6053 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
6054 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
6055 &AArch64::FPR128RegClass);
6056 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
6057 &AArch64::FPR128RegClass);
6058 // This instruction is reading and writing Q registers. This may upset
6059 // the register scavenger and machine verifier, so we need to indicate
6060 // that we are reading an undefined value from SrcRegQ, but a proper
6061 // value from SrcReg.
6062 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
6063 .addReg(SrcRegQ, RegState::Undef)
6064 .addReg(SrcRegQ, RegState::Undef)
6065 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6066 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
6067 !Subtarget.hasZeroCycleRegMoveFPR32()) {
6068 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
6069 &AArch64::FPR64RegClass);
6070 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
6071 &AArch64::FPR64RegClass);
6072 // This instruction is reading and writing D registers. This may upset
6073 // the register scavenger and machine verifier, so we need to indicate
6074 // that we are reading an undefined value from SrcRegD, but a proper
6075 // value from SrcReg.
6076 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
6077 .addReg(SrcRegD, RegState::Undef)
6078 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
6079 } else {
6080 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
6081 &AArch64::FPR32RegClass);
6082 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
6083 &AArch64::FPR32RegClass);
6084 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
6085 .addReg(SrcReg, getKillRegState(KillSrc));
6086 }
6087 return;
6088 }
6089
6090 // Copies between GPR64 and FPR64.
6091 if (AArch64::FPR64RegClass.contains(DestReg) &&
6092 AArch64::GPR64RegClass.contains(SrcReg)) {
6093 if (AArch64::XZR == SrcReg) {
6094 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
6095 } else {
6096 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
6097 .addReg(SrcReg, getKillRegState(KillSrc));
6098 }
6099 return;
6100 }
6101 if (AArch64::GPR64RegClass.contains(DestReg) &&
6102 AArch64::FPR64RegClass.contains(SrcReg)) {
6103 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
6104 .addReg(SrcReg, getKillRegState(KillSrc));
6105 return;
6106 }
6107 // Copies between GPR32 and FPR32.
6108 if (AArch64::FPR32RegClass.contains(DestReg) &&
6109 AArch64::GPR32RegClass.contains(SrcReg)) {
6110 if (AArch64::WZR == SrcReg) {
6111 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
6112 } else {
6113 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
6114 .addReg(SrcReg, getKillRegState(KillSrc));
6115 }
6116 return;
6117 }
6118 if (AArch64::GPR32RegClass.contains(DestReg) &&
6119 AArch64::FPR32RegClass.contains(SrcReg)) {
6120 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
6121 .addReg(SrcReg, getKillRegState(KillSrc));
6122 return;
6123 }
6124
6125 if (DestReg == AArch64::NZCV) {
6126 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
6127 BuildMI(MBB, I, DL, get(AArch64::MSR))
6128 .addImm(AArch64SysReg::NZCV)
6129 .addReg(SrcReg, getKillRegState(KillSrc))
6130 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
6131 return;
6132 }
6133
6134 if (SrcReg == AArch64::NZCV) {
6135 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
6136 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
6137 .addImm(AArch64SysReg::NZCV)
6138 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
6139 return;
6140 }
6141
6142#ifndef NDEBUG
6143 errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)
6144 << "\n";
6145#endif
6146 llvm_unreachable("unimplemented reg-to-reg copy");
6147}
6148
6151 MachineBasicBlock::iterator InsertBefore,
6152 const MCInstrDesc &MCID,
6153 Register SrcReg, bool IsKill,
6154 unsigned SubIdx0, unsigned SubIdx1, int FI,
6155 MachineMemOperand *MMO) {
6156 Register SrcReg0 = SrcReg;
6157 Register SrcReg1 = SrcReg;
6158 if (SrcReg.isPhysical()) {
6159 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
6160 SubIdx0 = 0;
6161 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
6162 SubIdx1 = 0;
6163 }
6164 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
6165 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
6166 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
6167 .addFrameIndex(FI)
6168 .addImm(0)
6169 .addMemOperand(MMO);
6170}
6171
6174 Register SrcReg, bool isKill, int FI,
6175 const TargetRegisterClass *RC,
6176 Register VReg,
6177 MachineInstr::MIFlag Flags) const {
6178 MachineFunction &MF = *MBB.getParent();
6179 MachineFrameInfo &MFI = MF.getFrameInfo();
6180
6182 MachineMemOperand *MMO =
6184 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
6185 unsigned Opc = 0;
6186 bool Offset = true;
6188 unsigned StackID = TargetStackID::Default;
6189 switch (RI.getSpillSize(*RC)) {
6190 case 1:
6191 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
6192 Opc = AArch64::STRBui;
6193 break;
6194 case 2: {
6195 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
6196 Opc = AArch64::STRHui;
6197 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
6198 AArch64::PPRRegClass.hasSubClassEq(RC)) {
6199 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6200 "Unexpected register store without SVE store instructions");
6201 Opc = AArch64::STR_PXI;
6203 }
6204 break;
6205 }
6206 case 4:
6207 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
6208 Opc = AArch64::STRWui;
6209 if (SrcReg.isVirtual())
6210 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
6211 else
6212 assert(SrcReg != AArch64::WSP);
6213 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6214 Opc = AArch64::STRSui;
6215 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
6216 Opc = AArch64::STR_PPXI;
6218 }
6219 break;
6220 case 8:
6221 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
6222 Opc = AArch64::STRXui;
6223 if (SrcReg.isVirtual())
6224 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6225 else
6226 assert(SrcReg != AArch64::SP);
6227 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
6228 Opc = AArch64::STRDui;
6229 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
6231 get(AArch64::STPWi), SrcReg, isKill,
6232 AArch64::sube32, AArch64::subo32, FI, MMO);
6233 return;
6234 }
6235 break;
6236 case 16:
6237 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
6238 Opc = AArch64::STRQui;
6239 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
6240 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6241 Opc = AArch64::ST1Twov1d;
6242 Offset = false;
6243 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
6245 get(AArch64::STPXi), SrcReg, isKill,
6246 AArch64::sube64, AArch64::subo64, FI, MMO);
6247 return;
6248 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
6249 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6250 "Unexpected register store without SVE store instructions");
6251 Opc = AArch64::STR_ZXI;
6253 }
6254 break;
6255 case 24:
6256 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
6257 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6258 Opc = AArch64::ST1Threev1d;
6259 Offset = false;
6260 }
6261 break;
6262 case 32:
6263 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6264 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6265 Opc = AArch64::ST1Fourv1d;
6266 Offset = false;
6267 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6268 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6269 Opc = AArch64::ST1Twov2d;
6270 Offset = false;
6271 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6272 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6273 "Unexpected register store without SVE store instructions");
6274 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
6276 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6277 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6278 "Unexpected register store without SVE store instructions");
6279 Opc = AArch64::STR_ZZXI;
6281 }
6282 break;
6283 case 48:
6284 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6285 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6286 Opc = AArch64::ST1Threev2d;
6287 Offset = false;
6288 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6289 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6290 "Unexpected register store without SVE store instructions");
6291 Opc = AArch64::STR_ZZZXI;
6293 }
6294 break;
6295 case 64:
6296 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6297 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6298 Opc = AArch64::ST1Fourv2d;
6299 Offset = false;
6300 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6301 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6302 "Unexpected register store without SVE store instructions");
6303 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
6305 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6306 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6307 "Unexpected register store without SVE store instructions");
6308 Opc = AArch64::STR_ZZZZXI;
6310 }
6311 break;
6312 }
6313 assert(Opc && "Unknown register class");
6314 MFI.setStackID(FI, StackID);
6315
6317 .addReg(SrcReg, getKillRegState(isKill))
6318 .addFrameIndex(FI);
6319
6320 if (Offset)
6321 MI.addImm(0);
6322 if (PNRReg.isValid())
6323 MI.addDef(PNRReg, RegState::Implicit);
6324 MI.addMemOperand(MMO);
6325}
6326
6329 MachineBasicBlock::iterator InsertBefore,
6330 const MCInstrDesc &MCID,
6331 Register DestReg, unsigned SubIdx0,
6332 unsigned SubIdx1, int FI,
6333 MachineMemOperand *MMO) {
6334 Register DestReg0 = DestReg;
6335 Register DestReg1 = DestReg;
6336 bool IsUndef = true;
6337 if (DestReg.isPhysical()) {
6338 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
6339 SubIdx0 = 0;
6340 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
6341 SubIdx1 = 0;
6342 IsUndef = false;
6343 }
6344 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
6345 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
6346 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
6347 .addFrameIndex(FI)
6348 .addImm(0)
6349 .addMemOperand(MMO);
6350}
6351
6354 Register DestReg, int FI,
6355 const TargetRegisterClass *RC,
6356 Register VReg, unsigned SubReg,
6357 MachineInstr::MIFlag Flags) const {
6358 MachineFunction &MF = *MBB.getParent();
6359 MachineFrameInfo &MFI = MF.getFrameInfo();
6361 MachineMemOperand *MMO =
6363 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
6364
6365 unsigned Opc = 0;
6366 bool Offset = true;
6367 unsigned StackID = TargetStackID::Default;
6369 switch (TRI.getSpillSize(*RC)) {
6370 case 1:
6371 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
6372 Opc = AArch64::LDRBui;
6373 break;
6374 case 2: {
6375 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
6376 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
6377 Opc = AArch64::LDRHui;
6378 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
6379 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6380 "Unexpected register load without SVE load instructions");
6381 if (IsPNR)
6382 PNRReg = DestReg;
6383 Opc = AArch64::LDR_PXI;
6385 }
6386 break;
6387 }
6388 case 4:
6389 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
6390 Opc = AArch64::LDRWui;
6391 if (DestReg.isVirtual())
6392 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
6393 else
6394 assert(DestReg != AArch64::WSP);
6395 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6396 Opc = AArch64::LDRSui;
6397 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
6398 Opc = AArch64::LDR_PPXI;
6400 }
6401 break;
6402 case 8:
6403 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
6404 Opc = AArch64::LDRXui;
6405 if (DestReg.isVirtual())
6406 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
6407 else
6408 assert(DestReg != AArch64::SP);
6409 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
6410 Opc = AArch64::LDRDui;
6411 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
6413 get(AArch64::LDPWi), DestReg, AArch64::sube32,
6414 AArch64::subo32, FI, MMO);
6415 return;
6416 }
6417 break;
6418 case 16:
6419 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
6420 Opc = AArch64::LDRQui;
6421 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
6422 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6423 Opc = AArch64::LD1Twov1d;
6424 Offset = false;
6425 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
6427 get(AArch64::LDPXi), DestReg, AArch64::sube64,
6428 AArch64::subo64, FI, MMO);
6429 return;
6430 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
6431 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6432 "Unexpected register load without SVE load instructions");
6433 Opc = AArch64::LDR_ZXI;
6435 }
6436 break;
6437 case 24:
6438 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
6439 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6440 Opc = AArch64::LD1Threev1d;
6441 Offset = false;
6442 }
6443 break;
6444 case 32:
6445 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6446 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6447 Opc = AArch64::LD1Fourv1d;
6448 Offset = false;
6449 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6450 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6451 Opc = AArch64::LD1Twov2d;
6452 Offset = false;
6453 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6454 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6455 "Unexpected register load without SVE load instructions");
6456 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
6458 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6459 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6460 "Unexpected register load without SVE load instructions");
6461 Opc = AArch64::LDR_ZZXI;
6463 }
6464 break;
6465 case 48:
6466 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6467 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6468 Opc = AArch64::LD1Threev2d;
6469 Offset = false;
6470 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6471 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6472 "Unexpected register load without SVE load instructions");
6473 Opc = AArch64::LDR_ZZZXI;
6475 }
6476 break;
6477 case 64:
6478 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6479 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6480 Opc = AArch64::LD1Fourv2d;
6481 Offset = false;
6482 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6483 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6484 "Unexpected register load without SVE load instructions");
6485 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
6487 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6488 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6489 "Unexpected register load without SVE load instructions");
6490 Opc = AArch64::LDR_ZZZZXI;
6492 }
6493 break;
6494 }
6495
6496 assert(Opc && "Unknown register class");
6497 MFI.setStackID(FI, StackID);
6498
6500 .addReg(DestReg, getDefRegState(true))
6501 .addFrameIndex(FI);
6502 if (Offset)
6503 MI.addImm(0);
6504 if (PNRReg.isValid() && !PNRReg.isVirtual())
6505 MI.addDef(PNRReg, RegState::Implicit);
6506 MI.addMemOperand(MMO);
6507}
6508
6510 const MachineInstr &UseMI,
6511 const TargetRegisterInfo *TRI) {
6512 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
6513 UseMI.getIterator()),
6514 [TRI](const MachineInstr &I) {
6515 return I.modifiesRegister(AArch64::NZCV, TRI) ||
6516 I.readsRegister(AArch64::NZCV, TRI);
6517 });
6518}
6519
6520void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6521 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
6522 // The smallest scalable element supported by scaled SVE addressing
6523 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6524 // byte offset must always be a multiple of 2.
6525 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6526
6527 // VGSized offsets are divided by '2', because the VG register is the
6528 // the number of 64bit granules as opposed to 128bit vector chunks,
6529 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
6530 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
6531 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
6532 ByteSized = Offset.getFixed();
6533 VGSized = Offset.getScalable() / 2;
6534}
6535
6536/// Returns the offset in parts to which this frame offset can be
6537/// decomposed for the purpose of describing a frame offset.
6538/// For non-scalable offsets this is simply its byte size.
6539void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6540 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
6541 int64_t &NumDataVectors) {
6542 // The smallest scalable element supported by scaled SVE addressing
6543 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6544 // byte offset must always be a multiple of 2.
6545 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6546
6547 NumBytes = Offset.getFixed();
6548 NumDataVectors = 0;
6549 NumPredicateVectors = Offset.getScalable() / 2;
6550 // This method is used to get the offsets to adjust the frame offset.
6551 // If the function requires ADDPL to be used and needs more than two ADDPL
6552 // instructions, part of the offset is folded into NumDataVectors so that it
6553 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
6554 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
6555 NumPredicateVectors > 62) {
6556 NumDataVectors = NumPredicateVectors / 8;
6557 NumPredicateVectors -= NumDataVectors * 8;
6558 }
6559}
6560
6561// Convenience function to create a DWARF expression for: Constant `Operation`.
6562// This helper emits compact sequences for common cases. For example, for`-15
6563// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
6566 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
6567 // -Constant (1 to 31)
6568 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
6569 Operation = dwarf::DW_OP_minus;
6570 } else if (Constant >= 0 && Constant <= 31) {
6571 // Literal value 0 to 31
6572 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
6573 } else {
6574 // Signed constant
6575 Expr.push_back(dwarf::DW_OP_consts);
6577 }
6578 return Expr.push_back(Operation);
6579}
6580
6581// Convenience function to create a DWARF expression for a register.
6582static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
6583 Expr.push_back((char)dwarf::DW_OP_bregx);
6585 Expr.push_back(0);
6586}
6587
6588// Convenience function to create a DWARF expression for loading a register from
6589// a CFA offset.
6591 int64_t OffsetFromDefCFA) {
6592 // This assumes the top of the DWARF stack contains the CFA.
6593 Expr.push_back(dwarf::DW_OP_dup);
6594 // Add the offset to the register.
6595 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
6596 // Dereference the address (loads a 64 bit value)..
6597 Expr.push_back(dwarf::DW_OP_deref);
6598}
6599
6600// Convenience function to create a comment for
6601// (+/-) NumBytes (* RegScale)?
6602static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6603 StringRef RegScale = {}) {
6604 if (NumBytes) {
6605 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6606 if (!RegScale.empty())
6607 Comment << ' ' << RegScale;
6608 }
6609}
6610
6611// Creates an MCCFIInstruction:
6612// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6614 unsigned Reg,
6615 const StackOffset &Offset) {
6616 int64_t NumBytes, NumVGScaledBytes;
6617 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6618 NumVGScaledBytes);
6619 std::string CommentBuffer;
6620 llvm::raw_string_ostream Comment(CommentBuffer);
6621
6622 if (Reg == AArch64::SP)
6623 Comment << "sp";
6624 else if (Reg == AArch64::FP)
6625 Comment << "fp";
6626 else
6627 Comment << printReg(Reg, &TRI);
6628
6629 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6630 SmallString<64> Expr;
6631 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6632 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6633 // Reg + NumBytes
6634 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6635 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6636 appendOffsetComment(NumBytes, Comment);
6637 if (NumVGScaledBytes) {
6638 // + VG * NumVGScaledBytes
6639 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6640 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6641 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6642 Expr.push_back(dwarf::DW_OP_plus);
6643 }
6644
6645 // Wrap this into DW_CFA_def_cfa.
6646 SmallString<64> DefCfaExpr;
6647 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6648 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6649 DefCfaExpr.append(Expr.str());
6650 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6651 Comment.str());
6652}
6653
6655 unsigned FrameReg, unsigned Reg,
6656 const StackOffset &Offset,
6657 bool LastAdjustmentWasScalable) {
6658 if (Offset.getScalable())
6659 return createDefCFAExpression(TRI, Reg, Offset);
6660
6661 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6662 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6663
6664 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6665 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6666}
6667
6670 const StackOffset &OffsetFromDefCFA,
6671 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6672 int64_t NumBytes, NumVGScaledBytes;
6673 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6674 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6675
6676 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6677
6678 // Non-scalable offsets can use DW_CFA_offset directly.
6679 if (!NumVGScaledBytes)
6680 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6681
6682 std::string CommentBuffer;
6683 llvm::raw_string_ostream Comment(CommentBuffer);
6684 Comment << printReg(Reg, &TRI) << " @ cfa";
6685
6686 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6687 assert(NumVGScaledBytes && "Expected scalable offset");
6688 SmallString<64> OffsetExpr;
6689 // + VG * NumVGScaledBytes
6690 StringRef VGRegScale;
6691 if (IncomingVGOffsetFromDefCFA) {
6692 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6693 VGRegScale = "* IncomingVG";
6694 } else {
6695 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6696 VGRegScale = "* VG";
6697 }
6698 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6699 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6700 OffsetExpr.push_back(dwarf::DW_OP_plus);
6701 if (NumBytes) {
6702 // + NumBytes
6703 appendOffsetComment(NumBytes, Comment);
6704 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6705 }
6706
6707 // Wrap this into DW_CFA_expression
6708 SmallString<64> CfaExpr;
6709 CfaExpr.push_back(dwarf::DW_CFA_expression);
6710 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6711 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6712 CfaExpr.append(OffsetExpr.str());
6713
6714 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6715 Comment.str());
6716}
6717
6718// Helper function to emit a frame offset adjustment from a given
6719// pointer (SrcReg), stored into DestReg. This function is explicit
6720// in that it requires the opcode.
6723 const DebugLoc &DL, unsigned DestReg,
6724 unsigned SrcReg, int64_t Offset, unsigned Opc,
6725 const TargetInstrInfo *TII,
6726 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6727 bool *HasWinCFI, bool EmitCFAOffset,
6728 StackOffset CFAOffset, unsigned FrameReg) {
6729 int Sign = 1;
6730 unsigned MaxEncoding, ShiftSize;
6731 switch (Opc) {
6732 case AArch64::ADDXri:
6733 case AArch64::ADDSXri:
6734 case AArch64::SUBXri:
6735 case AArch64::SUBSXri:
6736 MaxEncoding = 0xfff;
6737 ShiftSize = 12;
6738 break;
6739 case AArch64::ADDVL_XXI:
6740 case AArch64::ADDPL_XXI:
6741 case AArch64::ADDSVL_XXI:
6742 case AArch64::ADDSPL_XXI:
6743 MaxEncoding = 31;
6744 ShiftSize = 0;
6745 if (Offset < 0) {
6746 MaxEncoding = 32;
6747 Sign = -1;
6748 Offset = -Offset;
6749 }
6750 break;
6751 default:
6752 llvm_unreachable("Unsupported opcode");
6753 }
6754
6755 // `Offset` can be in bytes or in "scalable bytes".
6756 int VScale = 1;
6757 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6758 VScale = 16;
6759 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6760 VScale = 2;
6761
6762 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6763 // scratch register. If DestReg is a virtual register, use it as the
6764 // scratch register; otherwise, create a new virtual register (to be
6765 // replaced by the scavenger at the end of PEI). That case can be optimized
6766 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6767 // register can be loaded with offset%8 and the add/sub can use an extending
6768 // instruction with LSL#3.
6769 // Currently the function handles any offsets but generates a poor sequence
6770 // of code.
6771 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6772
6773 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6774 Register TmpReg = DestReg;
6775 if (TmpReg == AArch64::XZR)
6776 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6777 &AArch64::GPR64RegClass);
6778 do {
6779 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6780 unsigned LocalShiftSize = 0;
6781 if (ThisVal > MaxEncoding) {
6782 ThisVal = ThisVal >> ShiftSize;
6783 LocalShiftSize = ShiftSize;
6784 }
6785 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6786 "Encoding cannot handle value that big");
6787
6788 Offset -= ThisVal << LocalShiftSize;
6789 if (Offset == 0)
6790 TmpReg = DestReg;
6791 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6792 .addReg(SrcReg)
6793 .addImm(Sign * (int)ThisVal);
6794 if (ShiftSize)
6795 MBI = MBI.addImm(
6797 MBI = MBI.setMIFlag(Flag);
6798
6799 auto Change =
6800 VScale == 1
6801 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6802 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6803 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6804 CFAOffset += Change;
6805 else
6806 CFAOffset -= Change;
6807 if (EmitCFAOffset && DestReg == TmpReg) {
6808 MachineFunction &MF = *MBB.getParent();
6809 const TargetSubtargetInfo &STI = MF.getSubtarget();
6810 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6811
6812 unsigned CFIIndex = MF.addFrameInst(
6813 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6814 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6815 .addCFIIndex(CFIIndex)
6816 .setMIFlags(Flag);
6817 }
6818
6819 if (NeedsWinCFI) {
6820 int Imm = (int)(ThisVal << LocalShiftSize);
6821 if (VScale != 1 && DestReg == AArch64::SP) {
6822 if (HasWinCFI)
6823 *HasWinCFI = true;
6824 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6825 .addImm(ThisVal)
6826 .setMIFlag(Flag);
6827 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6828 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6829 assert(VScale == 1 && "Expected non-scalable operation");
6830 if (HasWinCFI)
6831 *HasWinCFI = true;
6832 if (Imm == 0)
6833 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6834 else
6835 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6836 .addImm(Imm)
6837 .setMIFlag(Flag);
6838 assert(Offset == 0 && "Expected remaining offset to be zero to "
6839 "emit a single SEH directive");
6840 } else if (DestReg == AArch64::SP) {
6841 assert(VScale == 1 && "Expected non-scalable operation");
6842 if (HasWinCFI)
6843 *HasWinCFI = true;
6844 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6845 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6846 .addImm(Imm)
6847 .setMIFlag(Flag);
6848 }
6849 }
6850
6851 SrcReg = TmpReg;
6852 } while (Offset);
6853}
6854
6857 unsigned DestReg, unsigned SrcReg,
6859 MachineInstr::MIFlag Flag, bool SetNZCV,
6860 bool NeedsWinCFI, bool *HasWinCFI,
6861 bool EmitCFAOffset, StackOffset CFAOffset,
6862 unsigned FrameReg) {
6863 // If a function is marked as arm_locally_streaming, then the runtime value of
6864 // vscale in the prologue/epilogue is different the runtime value of vscale
6865 // in the function's body. To avoid having to consider multiple vscales,
6866 // we can use `addsvl` to allocate any scalable stack-slots, which under
6867 // most circumstances will be only locals, not callee-save slots.
6868 const Function &F = MBB.getParent()->getFunction();
6869 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6870
6871 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6872 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6873 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6874
6875 // Insert ADDSXri for scalable offset at the end.
6876 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6877 if (NeedsFinalDefNZCV)
6878 SetNZCV = false;
6879
6880 // First emit non-scalable frame offsets, or a simple 'mov'.
6881 if (Bytes || (!Offset && SrcReg != DestReg)) {
6882 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6883 "SP increment/decrement not 8-byte aligned");
6884 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6885 if (Bytes < 0) {
6886 Bytes = -Bytes;
6887 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6888 }
6889 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6890 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6891 FrameReg);
6892 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6893 ? StackOffset::getFixed(-Bytes)
6894 : StackOffset::getFixed(Bytes);
6895 SrcReg = DestReg;
6896 FrameReg = DestReg;
6897 }
6898
6899 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6900 "WinCFI can't allocate fractions of an SVE data vector");
6901
6902 if (NumDataVectors) {
6903 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6904 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6905 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6906 FrameReg);
6907 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6908 SrcReg = DestReg;
6909 }
6910
6911 if (NumPredicateVectors) {
6912 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6913 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6914 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6915 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6916 FrameReg);
6917 }
6918
6919 if (NeedsFinalDefNZCV)
6920 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6921 .addReg(DestReg)
6922 .addImm(0)
6923 .addImm(0);
6924}
6925
6928 int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS,
6929 VirtRegMap *VRM) const {
6931 // This is a bit of a hack. Consider this instruction:
6932 //
6933 // %0 = COPY %sp; GPR64all:%0
6934 //
6935 // We explicitly chose GPR64all for the virtual register so such a copy might
6936 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6937 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6938 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6939 //
6940 // To prevent that, we are going to constrain the %0 register class here.
6941 if (MI.isFullCopy()) {
6942 Register DstReg = MI.getOperand(0).getReg();
6943 Register SrcReg = MI.getOperand(1).getReg();
6944 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6945 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6946 return nullptr;
6947 }
6948 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6949 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6950 return nullptr;
6951 }
6952 // Nothing can folded with copy from/to NZCV.
6953 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6954 return nullptr;
6955 }
6956
6957 // Handle the case where a copy is being spilled or filled but the source
6958 // and destination register class don't match. For example:
6959 //
6960 // %0 = COPY %xzr; GPR64common:%0
6961 //
6962 // In this case we can still safely fold away the COPY and generate the
6963 // following spill code:
6964 //
6965 // STRXui %xzr, %stack.0
6966 //
6967 // This also eliminates spilled cross register class COPYs (e.g. between x and
6968 // d regs) of the same size. For example:
6969 //
6970 // %0 = COPY %1; GPR64:%0, FPR64:%1
6971 //
6972 // will be filled as
6973 //
6974 // LDRDui %0, fi<#0>
6975 //
6976 // instead of
6977 //
6978 // LDRXui %Temp, fi<#0>
6979 // %0 = FMOV %Temp
6980 //
6981 if (MI.isCopy() && Ops.size() == 1 &&
6982 // Make sure we're only folding the explicit COPY defs/uses.
6983 (Ops[0] == 0 || Ops[0] == 1)) {
6984 bool IsSpill = Ops[0] == 0;
6985 bool IsFill = !IsSpill;
6987 const MachineRegisterInfo &MRI = MF.getRegInfo();
6988 MachineBasicBlock &MBB = *MI.getParent();
6989 const MachineOperand &DstMO = MI.getOperand(0);
6990 const MachineOperand &SrcMO = MI.getOperand(1);
6991 Register DstReg = DstMO.getReg();
6992 Register SrcReg = SrcMO.getReg();
6993 // This is slightly expensive to compute for physical regs since
6994 // getMinimalPhysRegClass is slow.
6995 auto getRegClass = [&](unsigned Reg) {
6996 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6997 : TRI.getMinimalPhysRegClass(Reg);
6998 };
6999
7000 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
7001 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
7002 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
7003 "Mismatched register size in non subreg COPY");
7004 if (IsSpill)
7005 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
7006 getRegClass(SrcReg), Register());
7007 else
7008 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
7009 getRegClass(DstReg), Register());
7010 return &*--InsertPt;
7011 }
7012
7013 // Handle cases like spilling def of:
7014 //
7015 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
7016 //
7017 // where the physical register source can be widened and stored to the full
7018 // virtual reg destination stack slot, in this case producing:
7019 //
7020 // STRXui %xzr, %stack.0
7021 //
7022 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
7023 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
7024 assert(SrcMO.getSubReg() == 0 &&
7025 "Unexpected subreg on physical register");
7026 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
7027 FrameIndex, &AArch64::GPR64RegClass, Register());
7028 return &*--InsertPt;
7029 }
7030
7031 // Handle cases like filling use of:
7032 //
7033 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
7034 //
7035 // where we can load the full virtual reg source stack slot, into the subreg
7036 // destination, in this case producing:
7037 //
7038 // LDRWui %0:sub_32<def,read-undef>, %stack.0
7039 //
7040 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
7041 const TargetRegisterClass *FillRC = nullptr;
7042 switch (DstMO.getSubReg()) {
7043 default:
7044 break;
7045 case AArch64::sub_32:
7046 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
7047 FillRC = &AArch64::GPR32RegClass;
7048 break;
7049 case AArch64::ssub:
7050 FillRC = &AArch64::FPR32RegClass;
7051 break;
7052 case AArch64::dsub:
7053 FillRC = &AArch64::FPR64RegClass;
7054 break;
7055 }
7056
7057 if (FillRC) {
7058 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
7059 TRI.getRegSizeInBits(*FillRC) &&
7060 "Mismatched regclass size on folded subreg COPY");
7061 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC,
7062 Register());
7063 MachineInstr &LoadMI = *--InsertPt;
7064 MachineOperand &LoadDst = LoadMI.getOperand(0);
7065 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
7066 LoadDst.setSubReg(DstMO.getSubReg());
7067 LoadDst.setIsUndef();
7068 return &LoadMI;
7069 }
7070 }
7071 }
7072
7073 // Cannot fold.
7074 return nullptr;
7075}
7076
7078 StackOffset &SOffset,
7079 bool *OutUseUnscaledOp,
7080 unsigned *OutUnscaledOp,
7081 int64_t *EmittableOffset) {
7082 // Set output values in case of early exit.
7083 if (EmittableOffset)
7084 *EmittableOffset = 0;
7085 if (OutUseUnscaledOp)
7086 *OutUseUnscaledOp = false;
7087 if (OutUnscaledOp)
7088 *OutUnscaledOp = 0;
7089
7090 // Exit early for structured vector spills/fills as they can't take an
7091 // immediate offset.
7092 switch (MI.getOpcode()) {
7093 default:
7094 break;
7095 case AArch64::LD1Rv1d:
7096 case AArch64::LD1Rv2s:
7097 case AArch64::LD1Rv2d:
7098 case AArch64::LD1Rv4h:
7099 case AArch64::LD1Rv4s:
7100 case AArch64::LD1Rv8b:
7101 case AArch64::LD1Rv8h:
7102 case AArch64::LD1Rv16b:
7103 case AArch64::LD1Twov2d:
7104 case AArch64::LD1Threev2d:
7105 case AArch64::LD1Fourv2d:
7106 case AArch64::LD1Twov1d:
7107 case AArch64::LD1Threev1d:
7108 case AArch64::LD1Fourv1d:
7109 case AArch64::ST1Twov2d:
7110 case AArch64::ST1Threev2d:
7111 case AArch64::ST1Fourv2d:
7112 case AArch64::ST1Twov1d:
7113 case AArch64::ST1Threev1d:
7114 case AArch64::ST1Fourv1d:
7115 case AArch64::ST1i8:
7116 case AArch64::ST1i16:
7117 case AArch64::ST1i32:
7118 case AArch64::ST1i64:
7119 case AArch64::IRG:
7120 case AArch64::IRGstack:
7121 case AArch64::STGloop:
7122 case AArch64::STZGloop:
7124 }
7125
7126 // Get the min/max offset and the scale.
7127 TypeSize ScaleValue(0U, false), Width(0U, false);
7128 int64_t MinOff, MaxOff;
7129 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
7130 MaxOff))
7131 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
7132
7133 // Construct the complete offset.
7134 bool IsMulVL = ScaleValue.isScalable();
7135 unsigned Scale = ScaleValue.getKnownMinValue();
7136 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
7137
7138 const MachineOperand &ImmOpnd =
7139 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
7140 Offset += ImmOpnd.getImm() * Scale;
7141
7142 // If the offset doesn't match the scale, we rewrite the instruction to
7143 // use the unscaled instruction instead. Likewise, if we have a negative
7144 // offset and there is an unscaled op to use.
7145 std::optional<unsigned> UnscaledOp =
7147 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
7148 if (useUnscaledOp &&
7149 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
7150 MaxOff))
7151 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
7152
7153 Scale = ScaleValue.getKnownMinValue();
7154 assert(IsMulVL == ScaleValue.isScalable() &&
7155 "Unscaled opcode has different value for scalable");
7156
7157 int64_t Remainder = Offset % Scale;
7158 assert(!(Remainder && useUnscaledOp) &&
7159 "Cannot have remainder when using unscaled op");
7160
7161 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
7162 int64_t NewOffset = Offset / Scale;
7163 if (MinOff <= NewOffset && NewOffset <= MaxOff)
7164 Offset = Remainder;
7165 else {
7166 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
7167 Offset = Offset - (NewOffset * Scale);
7168 }
7169
7170 if (EmittableOffset)
7171 *EmittableOffset = NewOffset;
7172 if (OutUseUnscaledOp)
7173 *OutUseUnscaledOp = useUnscaledOp;
7174 if (OutUnscaledOp && UnscaledOp)
7175 *OutUnscaledOp = *UnscaledOp;
7176
7177 if (IsMulVL)
7178 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
7179 else
7180 SOffset = StackOffset::get(Offset, SOffset.getScalable());
7182 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
7183}
7184
7186 unsigned FrameReg, StackOffset &Offset,
7187 const AArch64InstrInfo *TII) {
7188 unsigned Opcode = MI.getOpcode();
7189 unsigned ImmIdx = FrameRegIdx + 1;
7190
7191 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
7192 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
7193 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
7194 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
7195 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
7196 MI.eraseFromParent();
7197 Offset = StackOffset();
7198 return true;
7199 }
7200
7201 int64_t NewOffset;
7202 unsigned UnscaledOp;
7203 bool UseUnscaledOp;
7204 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
7205 &UnscaledOp, &NewOffset);
7208 // Replace the FrameIndex with FrameReg.
7209 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
7210 if (UseUnscaledOp)
7211 MI.setDesc(TII->get(UnscaledOp));
7212
7213 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
7214 return !Offset;
7215 }
7216
7217 return false;
7218}
7219
7225
7226MCInst AArch64InstrInfo::getNop() const { return MCInstBuilder(AArch64::NOP); }
7227
7228// AArch64 supports MachineCombiner.
7229bool AArch64InstrInfo::useMachineCombiner() const { return true; }
7230
7231// True when Opc sets flag
7232static bool isCombineInstrSettingFlag(unsigned Opc) {
7233 switch (Opc) {
7234 case AArch64::ADDSWrr:
7235 case AArch64::ADDSWri:
7236 case AArch64::ADDSXrr:
7237 case AArch64::ADDSXri:
7238 case AArch64::SUBSWrr:
7239 case AArch64::SUBSXrr:
7240 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7241 case AArch64::SUBSWri:
7242 case AArch64::SUBSXri:
7243 return true;
7244 default:
7245 break;
7246 }
7247 return false;
7248}
7249
7250// 32b Opcodes that can be combined with a MUL
7251static bool isCombineInstrCandidate32(unsigned Opc) {
7252 switch (Opc) {
7253 case AArch64::ADDWrr:
7254 case AArch64::ADDWri:
7255 case AArch64::SUBWrr:
7256 case AArch64::ADDSWrr:
7257 case AArch64::ADDSWri:
7258 case AArch64::SUBSWrr:
7259 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7260 case AArch64::SUBWri:
7261 case AArch64::SUBSWri:
7262 return true;
7263 default:
7264 break;
7265 }
7266 return false;
7267}
7268
7269// 64b Opcodes that can be combined with a MUL
7270static bool isCombineInstrCandidate64(unsigned Opc) {
7271 switch (Opc) {
7272 case AArch64::ADDXrr:
7273 case AArch64::ADDXri:
7274 case AArch64::SUBXrr:
7275 case AArch64::ADDSXrr:
7276 case AArch64::ADDSXri:
7277 case AArch64::SUBSXrr:
7278 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7279 case AArch64::SUBXri:
7280 case AArch64::SUBSXri:
7281 case AArch64::ADDv8i8:
7282 case AArch64::ADDv16i8:
7283 case AArch64::ADDv4i16:
7284 case AArch64::ADDv8i16:
7285 case AArch64::ADDv2i32:
7286 case AArch64::ADDv4i32:
7287 case AArch64::SUBv8i8:
7288 case AArch64::SUBv16i8:
7289 case AArch64::SUBv4i16:
7290 case AArch64::SUBv8i16:
7291 case AArch64::SUBv2i32:
7292 case AArch64::SUBv4i32:
7293 return true;
7294 default:
7295 break;
7296 }
7297 return false;
7298}
7299
7300// FP Opcodes that can be combined with a FMUL.
7301static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
7302 switch (Inst.getOpcode()) {
7303 default:
7304 break;
7305 case AArch64::FADDHrr:
7306 case AArch64::FADDSrr:
7307 case AArch64::FADDDrr:
7308 case AArch64::FADDv4f16:
7309 case AArch64::FADDv8f16:
7310 case AArch64::FADDv2f32:
7311 case AArch64::FADDv2f64:
7312 case AArch64::FADDv4f32:
7313 case AArch64::FSUBHrr:
7314 case AArch64::FSUBSrr:
7315 case AArch64::FSUBDrr:
7316 case AArch64::FSUBv4f16:
7317 case AArch64::FSUBv8f16:
7318 case AArch64::FSUBv2f32:
7319 case AArch64::FSUBv2f64:
7320 case AArch64::FSUBv4f32:
7322 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
7323 // the target options or if FADD/FSUB has the contract fast-math flag.
7324 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
7326 }
7327 return false;
7328}
7329
7330// Opcodes that can be combined with a MUL
7334
7335//
7336// Utility routine that checks if \param MO is defined by an
7337// \param CombineOpc instruction in the basic block \param MBB
7339 unsigned CombineOpc, unsigned ZeroReg = 0,
7340 bool CheckZeroReg = false) {
7341 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7342 MachineInstr *MI = nullptr;
7343
7344 if (MO.isReg() && MO.getReg().isVirtual())
7345 MI = MRI.getUniqueVRegDef(MO.getReg());
7346 // And it needs to be in the trace (otherwise, it won't have a depth).
7347 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
7348 return false;
7349 // Must only used by the user we combine with.
7350 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
7351 return false;
7352
7353 if (CheckZeroReg) {
7354 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
7355 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
7356 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
7357 // The third input reg must be zero.
7358 if (MI->getOperand(3).getReg() != ZeroReg)
7359 return false;
7360 }
7361
7362 if (isCombineInstrSettingFlag(CombineOpc) &&
7363 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
7364 return false;
7365
7366 return true;
7367}
7368
7369//
7370// Is \param MO defined by an integer multiply and can be combined?
7372 unsigned MulOpc, unsigned ZeroReg) {
7373 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
7374}
7375
7376//
7377// Is \param MO defined by a floating-point multiply and can be combined?
7379 unsigned MulOpc) {
7380 return canCombine(MBB, MO, MulOpc);
7381}
7382
7383// TODO: There are many more machine instruction opcodes to match:
7384// 1. Other data types (integer, vectors)
7385// 2. Other math / logic operations (xor, or)
7386// 3. Other forms of the same operation (intrinsics and other variants)
7387bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
7388 bool Invert) const {
7389 if (Invert)
7390 return false;
7391 switch (Inst.getOpcode()) {
7392 // == Floating-point types ==
7393 // -- Floating-point instructions --
7394 case AArch64::FADDHrr:
7395 case AArch64::FADDSrr:
7396 case AArch64::FADDDrr:
7397 case AArch64::FMULHrr:
7398 case AArch64::FMULSrr:
7399 case AArch64::FMULDrr:
7400 case AArch64::FMULX16:
7401 case AArch64::FMULX32:
7402 case AArch64::FMULX64:
7403 // -- Advanced SIMD instructions --
7404 case AArch64::FADDv4f16:
7405 case AArch64::FADDv8f16:
7406 case AArch64::FADDv2f32:
7407 case AArch64::FADDv4f32:
7408 case AArch64::FADDv2f64:
7409 case AArch64::FMULv4f16:
7410 case AArch64::FMULv8f16:
7411 case AArch64::FMULv2f32:
7412 case AArch64::FMULv4f32:
7413 case AArch64::FMULv2f64:
7414 case AArch64::FMULXv4f16:
7415 case AArch64::FMULXv8f16:
7416 case AArch64::FMULXv2f32:
7417 case AArch64::FMULXv4f32:
7418 case AArch64::FMULXv2f64:
7419 // -- SVE instructions --
7420 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
7421 // in the SVE instruction set (though there are predicated ones).
7422 case AArch64::FADD_ZZZ_H:
7423 case AArch64::FADD_ZZZ_S:
7424 case AArch64::FADD_ZZZ_D:
7425 case AArch64::FMUL_ZZZ_H:
7426 case AArch64::FMUL_ZZZ_S:
7427 case AArch64::FMUL_ZZZ_D:
7430
7431 // == Integer types ==
7432 // -- Base instructions --
7433 // Opcodes MULWrr and MULXrr don't exist because
7434 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
7435 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
7436 // The machine-combiner does not support three-source-operands machine
7437 // instruction. So we cannot reassociate MULs.
7438 case AArch64::ADDWrr:
7439 case AArch64::ADDXrr:
7440 case AArch64::ANDWrr:
7441 case AArch64::ANDXrr:
7442 case AArch64::ORRWrr:
7443 case AArch64::ORRXrr:
7444 case AArch64::EORWrr:
7445 case AArch64::EORXrr:
7446 case AArch64::EONWrr:
7447 case AArch64::EONXrr:
7448 // -- Advanced SIMD instructions --
7449 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
7450 // in the Advanced SIMD instruction set.
7451 case AArch64::ADDv8i8:
7452 case AArch64::ADDv16i8:
7453 case AArch64::ADDv4i16:
7454 case AArch64::ADDv8i16:
7455 case AArch64::ADDv2i32:
7456 case AArch64::ADDv4i32:
7457 case AArch64::ADDv1i64:
7458 case AArch64::ADDv2i64:
7459 case AArch64::MULv8i8:
7460 case AArch64::MULv16i8:
7461 case AArch64::MULv4i16:
7462 case AArch64::MULv8i16:
7463 case AArch64::MULv2i32:
7464 case AArch64::MULv4i32:
7465 case AArch64::ANDv8i8:
7466 case AArch64::ANDv16i8:
7467 case AArch64::ORRv8i8:
7468 case AArch64::ORRv16i8:
7469 case AArch64::EORv8i8:
7470 case AArch64::EORv16i8:
7471 // -- SVE instructions --
7472 case AArch64::ADD_ZZZ_B:
7473 case AArch64::ADD_ZZZ_H:
7474 case AArch64::ADD_ZZZ_S:
7475 case AArch64::ADD_ZZZ_D:
7476 case AArch64::MUL_ZZZ_B:
7477 case AArch64::MUL_ZZZ_H:
7478 case AArch64::MUL_ZZZ_S:
7479 case AArch64::MUL_ZZZ_D:
7480 case AArch64::AND_ZZZ:
7481 case AArch64::ORR_ZZZ:
7482 case AArch64::EOR_ZZZ:
7483 return true;
7484
7485 default:
7486 return false;
7487 }
7488}
7489
7490/// Find instructions that can be turned into madd.
7492 SmallVectorImpl<unsigned> &Patterns) {
7493 unsigned Opc = Root.getOpcode();
7494 MachineBasicBlock &MBB = *Root.getParent();
7495 bool Found = false;
7496
7498 return false;
7500 int Cmp_NZCV =
7501 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
7502 // When NZCV is live bail out.
7503 if (Cmp_NZCV == -1)
7504 return false;
7505 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
7506 // When opcode can't change bail out.
7507 // CHECKME: do we miss any cases for opcode conversion?
7508 if (NewOpc == Opc)
7509 return false;
7510 Opc = NewOpc;
7511 }
7512
7513 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
7514 unsigned Pattern) {
7515 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
7516 Patterns.push_back(Pattern);
7517 Found = true;
7518 }
7519 };
7520
7521 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
7522 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
7523 Patterns.push_back(Pattern);
7524 Found = true;
7525 }
7526 };
7527
7529
7530 switch (Opc) {
7531 default:
7532 break;
7533 case AArch64::ADDWrr:
7534 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7535 "ADDWrr does not have register operands");
7536 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
7537 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
7538 break;
7539 case AArch64::ADDXrr:
7540 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
7541 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
7542 break;
7543 case AArch64::SUBWrr:
7544 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
7545 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
7546 break;
7547 case AArch64::SUBXrr:
7548 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
7549 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
7550 break;
7551 case AArch64::ADDWri:
7552 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
7553 break;
7554 case AArch64::ADDXri:
7555 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
7556 break;
7557 case AArch64::SUBWri:
7558 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
7559 break;
7560 case AArch64::SUBXri:
7561 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
7562 break;
7563 case AArch64::ADDv8i8:
7564 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
7565 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
7566 break;
7567 case AArch64::ADDv16i8:
7568 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
7569 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
7570 break;
7571 case AArch64::ADDv4i16:
7572 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
7573 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
7574 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
7575 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
7576 break;
7577 case AArch64::ADDv8i16:
7578 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
7579 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
7580 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
7581 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
7582 break;
7583 case AArch64::ADDv2i32:
7584 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
7585 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
7586 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
7587 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
7588 break;
7589 case AArch64::ADDv4i32:
7590 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
7591 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
7592 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
7593 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
7594 break;
7595 case AArch64::SUBv8i8:
7596 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7597 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7598 break;
7599 case AArch64::SUBv16i8:
7600 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7601 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7602 break;
7603 case AArch64::SUBv4i16:
7604 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7605 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7606 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7607 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7608 break;
7609 case AArch64::SUBv8i16:
7610 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7611 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7612 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7613 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7614 break;
7615 case AArch64::SUBv2i32:
7616 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7617 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7618 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7619 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7620 break;
7621 case AArch64::SUBv4i32:
7622 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7623 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7624 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7625 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7626 break;
7627 }
7628 return Found;
7629}
7630
7631bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7632 switch (Opcode) {
7633 default:
7634 break;
7635 case AArch64::UABALB_ZZZ_D:
7636 case AArch64::UABALB_ZZZ_H:
7637 case AArch64::UABALB_ZZZ_S:
7638 case AArch64::UABALT_ZZZ_D:
7639 case AArch64::UABALT_ZZZ_H:
7640 case AArch64::UABALT_ZZZ_S:
7641 case AArch64::SABALB_ZZZ_D:
7642 case AArch64::SABALB_ZZZ_S:
7643 case AArch64::SABALB_ZZZ_H:
7644 case AArch64::SABALT_ZZZ_D:
7645 case AArch64::SABALT_ZZZ_S:
7646 case AArch64::SABALT_ZZZ_H:
7647 case AArch64::UABALv16i8_v8i16:
7648 case AArch64::UABALv2i32_v2i64:
7649 case AArch64::UABALv4i16_v4i32:
7650 case AArch64::UABALv4i32_v2i64:
7651 case AArch64::UABALv8i16_v4i32:
7652 case AArch64::UABALv8i8_v8i16:
7653 case AArch64::UABAv16i8:
7654 case AArch64::UABAv2i32:
7655 case AArch64::UABAv4i16:
7656 case AArch64::UABAv4i32:
7657 case AArch64::UABAv8i16:
7658 case AArch64::UABAv8i8:
7659 case AArch64::SABALv16i8_v8i16:
7660 case AArch64::SABALv2i32_v2i64:
7661 case AArch64::SABALv4i16_v4i32:
7662 case AArch64::SABALv4i32_v2i64:
7663 case AArch64::SABALv8i16_v4i32:
7664 case AArch64::SABALv8i8_v8i16:
7665 case AArch64::SABAv16i8:
7666 case AArch64::SABAv2i32:
7667 case AArch64::SABAv4i16:
7668 case AArch64::SABAv4i32:
7669 case AArch64::SABAv8i16:
7670 case AArch64::SABAv8i8:
7671 return true;
7672 }
7673
7674 return false;
7675}
7676
7677unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7678 unsigned AccumulationOpcode) const {
7679 switch (AccumulationOpcode) {
7680 default:
7681 llvm_unreachable("Unsupported accumulation Opcode!");
7682 case AArch64::UABALB_ZZZ_D:
7683 return AArch64::UABDLB_ZZZ_D;
7684 case AArch64::UABALB_ZZZ_H:
7685 return AArch64::UABDLB_ZZZ_H;
7686 case AArch64::UABALB_ZZZ_S:
7687 return AArch64::UABDLB_ZZZ_S;
7688 case AArch64::UABALT_ZZZ_D:
7689 return AArch64::UABDLT_ZZZ_D;
7690 case AArch64::UABALT_ZZZ_H:
7691 return AArch64::UABDLT_ZZZ_H;
7692 case AArch64::UABALT_ZZZ_S:
7693 return AArch64::UABDLT_ZZZ_S;
7694 case AArch64::UABALv16i8_v8i16:
7695 return AArch64::UABDLv16i8_v8i16;
7696 case AArch64::UABALv2i32_v2i64:
7697 return AArch64::UABDLv2i32_v2i64;
7698 case AArch64::UABALv4i16_v4i32:
7699 return AArch64::UABDLv4i16_v4i32;
7700 case AArch64::UABALv4i32_v2i64:
7701 return AArch64::UABDLv4i32_v2i64;
7702 case AArch64::UABALv8i16_v4i32:
7703 return AArch64::UABDLv8i16_v4i32;
7704 case AArch64::UABALv8i8_v8i16:
7705 return AArch64::UABDLv8i8_v8i16;
7706 case AArch64::UABAv16i8:
7707 return AArch64::UABDv16i8;
7708 case AArch64::UABAv2i32:
7709 return AArch64::UABDv2i32;
7710 case AArch64::UABAv4i16:
7711 return AArch64::UABDv4i16;
7712 case AArch64::UABAv4i32:
7713 return AArch64::UABDv4i32;
7714 case AArch64::UABAv8i16:
7715 return AArch64::UABDv8i16;
7716 case AArch64::UABAv8i8:
7717 return AArch64::UABDv8i8;
7718 case AArch64::SABALB_ZZZ_D:
7719 return AArch64::SABDLB_ZZZ_D;
7720 case AArch64::SABALB_ZZZ_S:
7721 return AArch64::SABDLB_ZZZ_S;
7722 case AArch64::SABALB_ZZZ_H:
7723 return AArch64::SABDLB_ZZZ_H;
7724 case AArch64::SABALT_ZZZ_D:
7725 return AArch64::SABDLT_ZZZ_D;
7726 case AArch64::SABALT_ZZZ_S:
7727 return AArch64::SABDLT_ZZZ_S;
7728 case AArch64::SABALT_ZZZ_H:
7729 return AArch64::SABDLT_ZZZ_H;
7730 case AArch64::SABALv16i8_v8i16:
7731 return AArch64::SABDLv16i8_v8i16;
7732 case AArch64::SABALv2i32_v2i64:
7733 return AArch64::SABDLv2i32_v2i64;
7734 case AArch64::SABALv4i16_v4i32:
7735 return AArch64::SABDLv4i16_v4i32;
7736 case AArch64::SABALv4i32_v2i64:
7737 return AArch64::SABDLv4i32_v2i64;
7738 case AArch64::SABALv8i16_v4i32:
7739 return AArch64::SABDLv8i16_v4i32;
7740 case AArch64::SABALv8i8_v8i16:
7741 return AArch64::SABDLv8i8_v8i16;
7742 case AArch64::SABAv16i8:
7743 return AArch64::SABDv16i8;
7744 case AArch64::SABAv2i32:
7745 return AArch64::SABAv2i32;
7746 case AArch64::SABAv4i16:
7747 return AArch64::SABDv4i16;
7748 case AArch64::SABAv4i32:
7749 return AArch64::SABDv4i32;
7750 case AArch64::SABAv8i16:
7751 return AArch64::SABDv8i16;
7752 case AArch64::SABAv8i8:
7753 return AArch64::SABDv8i8;
7754 }
7755}
7756
7757/// Floating-Point Support
7758
7759/// Find instructions that can be turned into madd.
7761 SmallVectorImpl<unsigned> &Patterns) {
7762
7763 if (!isCombineInstrCandidateFP(Root))
7764 return false;
7765
7766 MachineBasicBlock &MBB = *Root.getParent();
7767 bool Found = false;
7768
7769 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7770 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7771 Patterns.push_back(Pattern);
7772 return true;
7773 }
7774 return false;
7775 };
7776
7778
7779 switch (Root.getOpcode()) {
7780 default:
7781 assert(false && "Unsupported FP instruction in combiner\n");
7782 break;
7783 case AArch64::FADDHrr:
7784 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7785 "FADDHrr does not have register operands");
7786
7787 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7788 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7789 break;
7790 case AArch64::FADDSrr:
7791 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7792 "FADDSrr does not have register operands");
7793
7794 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7795 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7796
7797 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7798 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7799 break;
7800 case AArch64::FADDDrr:
7801 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7802 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7803
7804 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7805 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7806 break;
7807 case AArch64::FADDv4f16:
7808 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7809 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7810
7811 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7812 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7813 break;
7814 case AArch64::FADDv8f16:
7815 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7816 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7817
7818 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7819 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7820 break;
7821 case AArch64::FADDv2f32:
7822 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7823 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7824
7825 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7826 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7827 break;
7828 case AArch64::FADDv2f64:
7829 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7830 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7831
7832 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7833 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7834 break;
7835 case AArch64::FADDv4f32:
7836 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7837 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7838
7839 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7840 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7841 break;
7842 case AArch64::FSUBHrr:
7843 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7844 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7845 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7846 break;
7847 case AArch64::FSUBSrr:
7848 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7849
7850 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7851 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7852
7853 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7854 break;
7855 case AArch64::FSUBDrr:
7856 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7857
7858 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7859 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7860
7861 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7862 break;
7863 case AArch64::FSUBv4f16:
7864 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7865 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7866
7867 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7868 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7869 break;
7870 case AArch64::FSUBv8f16:
7871 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7872 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7873
7874 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7875 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7876 break;
7877 case AArch64::FSUBv2f32:
7878 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7879 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7880
7881 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7882 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7883 break;
7884 case AArch64::FSUBv2f64:
7885 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7886 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7887
7888 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7889 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7890 break;
7891 case AArch64::FSUBv4f32:
7892 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7893 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7894
7895 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7896 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7897 break;
7898 }
7899 return Found;
7900}
7901
7903 SmallVectorImpl<unsigned> &Patterns) {
7904 MachineBasicBlock &MBB = *Root.getParent();
7905 bool Found = false;
7906
7907 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7908 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7909 MachineOperand &MO = Root.getOperand(Operand);
7910 MachineInstr *MI = nullptr;
7911 if (MO.isReg() && MO.getReg().isVirtual())
7912 MI = MRI.getUniqueVRegDef(MO.getReg());
7913 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7914 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7915 MI->getOperand(1).getReg().isVirtual())
7916 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7917 if (MI && MI->getOpcode() == Opcode) {
7918 Patterns.push_back(Pattern);
7919 return true;
7920 }
7921 return false;
7922 };
7923
7925
7926 switch (Root.getOpcode()) {
7927 default:
7928 return false;
7929 case AArch64::FMULv2f32:
7930 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7931 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7932 break;
7933 case AArch64::FMULv2f64:
7934 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7935 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7936 break;
7937 case AArch64::FMULv4f16:
7938 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7939 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7940 break;
7941 case AArch64::FMULv4f32:
7942 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7943 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7944 break;
7945 case AArch64::FMULv8f16:
7946 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7947 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7948 break;
7949 }
7950
7951 return Found;
7952}
7953
7955 SmallVectorImpl<unsigned> &Patterns) {
7956 unsigned Opc = Root.getOpcode();
7957 MachineBasicBlock &MBB = *Root.getParent();
7958 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7959
7960 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7961 MachineOperand &MO = Root.getOperand(1);
7963 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7964 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7968 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7969 Patterns.push_back(Pattern);
7970 return true;
7971 }
7972 return false;
7973 };
7974
7975 switch (Opc) {
7976 default:
7977 break;
7978 case AArch64::FNEGDr:
7979 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7980 case AArch64::FNEGSr:
7981 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7982 }
7983
7984 return false;
7985}
7986
7987/// Return true when a code sequence can improve throughput. It
7988/// should be called only for instructions in loops.
7989/// \param Pattern - combiner pattern
7991 switch (Pattern) {
7992 default:
7993 break;
8099 return true;
8100 } // end switch (Pattern)
8101 return false;
8102}
8103
8104/// Find other MI combine patterns.
8106 SmallVectorImpl<unsigned> &Patterns) {
8107 // A - (B + C) ==> (A - B) - C or (A - C) - B
8108 unsigned Opc = Root.getOpcode();
8109 MachineBasicBlock &MBB = *Root.getParent();
8110
8111 switch (Opc) {
8112 case AArch64::SUBWrr:
8113 case AArch64::SUBSWrr:
8114 case AArch64::SUBXrr:
8115 case AArch64::SUBSXrr:
8116 // Found candidate root.
8117 break;
8118 default:
8119 return false;
8120 }
8121
8123 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
8124 -1)
8125 return false;
8126
8127 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
8128 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
8129 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
8130 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
8133 return true;
8134 }
8135
8136 return false;
8137}
8138
8139/// Check if the given instruction forms a gather load pattern that can be
8140/// optimized for better Memory-Level Parallelism (MLP). This function
8141/// identifies chains of NEON lane load instructions that load data from
8142/// different memory addresses into individual lanes of a 128-bit vector
8143/// register, then attempts to split the pattern into parallel loads to break
8144/// the serial dependency between instructions.
8145///
8146/// Pattern Matched:
8147/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
8148/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
8149///
8150/// Transformed Into:
8151/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
8152/// to combine the results, enabling better memory-level parallelism.
8153///
8154/// Supported Element Types:
8155/// - 32-bit elements (LD1i32, 4 lanes total)
8156/// - 16-bit elements (LD1i16, 8 lanes total)
8157/// - 8-bit elements (LD1i8, 16 lanes total)
8159 SmallVectorImpl<unsigned> &Patterns,
8160 unsigned LoadLaneOpCode, unsigned NumLanes) {
8161 const MachineFunction *MF = Root.getMF();
8162
8163 // Early exit if optimizing for size.
8164 if (MF->getFunction().hasMinSize())
8165 return false;
8166
8167 const MachineRegisterInfo &MRI = MF->getRegInfo();
8169
8170 // The root of the pattern must load into the last lane of the vector.
8171 if (Root.getOperand(2).getImm() != NumLanes - 1)
8172 return false;
8173
8174 // Check that we have load into all lanes except lane 0.
8175 // For each load we also want to check that:
8176 // 1. It has a single non-debug use (since we will be replacing the virtual
8177 // register)
8178 // 2. That the addressing mode only uses a single pointer operand
8179 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8180 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
8181 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
8183 while (!RemainingLanes.empty() && CurrInstr &&
8184 CurrInstr->getOpcode() == LoadLaneOpCode &&
8185 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
8186 CurrInstr->getNumOperands() == 4) {
8187 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
8188 LoadInstrs.push_back(CurrInstr);
8189 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
8190 }
8191
8192 // Check that we have found a match for lanes N-1.. 1.
8193 if (!RemainingLanes.empty())
8194 return false;
8195
8196 // Match the SUBREG_TO_REG sequence.
8197 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
8198 return false;
8199
8200 // Verify that the subreg to reg loads an integer into the first lane.
8201 auto Lane0LoadReg = CurrInstr->getOperand(1).getReg();
8202 unsigned SingleLaneSizeInBits = 128 / NumLanes;
8203 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
8204 return false;
8205
8206 // Verify that it also has a single non debug use.
8207 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
8208 return false;
8209
8210 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
8211
8212 // If there is any chance of aliasing, do not apply the pattern.
8213 // Walk backward through the MBB starting from Root.
8214 // Exit early if we've encountered all load instructions or hit the search
8215 // limit.
8216 auto MBBItr = Root.getIterator();
8217 unsigned RemainingSteps = GatherOptSearchLimit;
8218 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
8219 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
8220 const MachineBasicBlock *MBB = Root.getParent();
8221
8222 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
8223 !RemainingLoadInstrs.empty();
8224 --MBBItr, --RemainingSteps) {
8225 const MachineInstr &CurrInstr = *MBBItr;
8226
8227 // Remove this instruction from remaining loads if it's one we're tracking.
8228 RemainingLoadInstrs.erase(&CurrInstr);
8229
8230 // Check for potential aliasing with any of the load instructions to
8231 // optimize.
8232 if (CurrInstr.isLoadFoldBarrier())
8233 return false;
8234 }
8235
8236 // If we hit the search limit without finding all load instructions,
8237 // don't match the pattern.
8238 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
8239 return false;
8240
8241 switch (NumLanes) {
8242 case 4:
8244 break;
8245 case 8:
8247 break;
8248 case 16:
8250 break;
8251 default:
8252 llvm_unreachable("Got bad number of lanes for gather pattern.");
8253 }
8254
8255 return true;
8256}
8257
8258/// Search for patterns of LD instructions we can optimize.
8260 SmallVectorImpl<unsigned> &Patterns) {
8261
8262 // The pattern searches for loads into single lanes.
8263 switch (Root.getOpcode()) {
8264 case AArch64::LD1i32:
8265 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
8266 case AArch64::LD1i16:
8267 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
8268 case AArch64::LD1i8:
8269 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
8270 default:
8271 return false;
8272 }
8273}
8274
8275/// Generate optimized instruction sequence for gather load patterns to improve
8276/// Memory-Level Parallelism (MLP). This function transforms a chain of
8277/// sequential NEON lane loads into parallel vector loads that can execute
8278/// concurrently.
8279static void
8283 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8284 unsigned Pattern, unsigned NumLanes) {
8285 MachineFunction &MF = *Root.getParent()->getParent();
8286 MachineRegisterInfo &MRI = MF.getRegInfo();
8288
8289 // Gather the initial load instructions to build the pattern.
8290 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
8291 MachineInstr *CurrInstr = &Root;
8292 for (unsigned i = 0; i < NumLanes - 1; ++i) {
8293 LoadToLaneInstrs.push_back(CurrInstr);
8294 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
8295 }
8296
8297 // Sort the load instructions according to the lane.
8298 llvm::sort(LoadToLaneInstrs,
8299 [](const MachineInstr *A, const MachineInstr *B) {
8300 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
8301 });
8302
8303 MachineInstr *SubregToReg = CurrInstr;
8304 LoadToLaneInstrs.push_back(
8305 MRI.getUniqueVRegDef(SubregToReg->getOperand(1).getReg()));
8306 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
8307
8308 const TargetRegisterClass *FPR128RegClass =
8309 MRI.getRegClass(Root.getOperand(0).getReg());
8310
8311 // Helper lambda to create a LD1 instruction.
8312 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
8313 Register SrcRegister, unsigned Lane,
8314 Register OffsetRegister,
8315 bool OffsetRegisterKillState) {
8316 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
8317 MachineInstrBuilder LoadIndexIntoRegister =
8318 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
8319 NewRegister)
8320 .addReg(SrcRegister)
8321 .addImm(Lane)
8322 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState))
8323 .setMemRefs(OriginalInstr->memoperands());
8324 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
8325 InsInstrs.push_back(LoadIndexIntoRegister);
8326 return NewRegister;
8327 };
8328
8329 // Helper to create load instruction based on the NumLanes in the NEON
8330 // register we are rewriting.
8331 auto CreateLDRInstruction =
8332 [&](unsigned NumLanes, Register DestReg, Register OffsetReg,
8334 unsigned Opcode;
8335 switch (NumLanes) {
8336 case 4:
8337 Opcode = AArch64::LDRSui;
8338 break;
8339 case 8:
8340 Opcode = AArch64::LDRHui;
8341 break;
8342 case 16:
8343 Opcode = AArch64::LDRBui;
8344 break;
8345 default:
8347 "Got unsupported number of lanes in machine-combiner gather pattern");
8348 }
8349 // Immediate offset load
8350 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
8351 .addReg(OffsetReg)
8352 .addImm(0)
8353 .setMemRefs(MMOs);
8354 };
8355
8356 // Load the remaining lanes into register 0.
8357 auto LanesToLoadToReg0 =
8358 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
8359 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
8360 Register PrevReg = SubregToReg->getOperand(0).getReg();
8361 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
8362 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8363 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8364 OffsetRegOperand.getReg(),
8365 OffsetRegOperand.isKill());
8366 DelInstrs.push_back(LoadInstr);
8367 }
8368 Register LastLoadReg0 = PrevReg;
8369
8370 // First load into register 1. Perform an integer load to zero out the upper
8371 // lanes in a single instruction.
8372 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
8373 MachineInstr *OriginalSplitLoad =
8374 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
8375 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
8376 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
8377
8378 const MachineOperand &OriginalSplitToLoadOffsetOperand =
8379 OriginalSplitLoad->getOperand(3);
8380 MachineInstrBuilder MiddleIndexLoadInstr =
8381 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
8382 OriginalSplitToLoadOffsetOperand.getReg(),
8383 OriginalSplitLoad->memoperands());
8384
8385 InstrIdxForVirtReg.insert(
8386 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
8387 InsInstrs.push_back(MiddleIndexLoadInstr);
8388 DelInstrs.push_back(OriginalSplitLoad);
8389
8390 // Subreg To Reg instruction for register 1.
8391 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
8392 unsigned SubregType;
8393 switch (NumLanes) {
8394 case 4:
8395 SubregType = AArch64::ssub;
8396 break;
8397 case 8:
8398 SubregType = AArch64::hsub;
8399 break;
8400 case 16:
8401 SubregType = AArch64::bsub;
8402 break;
8403 default:
8405 "Got invalid NumLanes for machine-combiner gather pattern");
8406 }
8407
8408 auto SubRegToRegInstr =
8409 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
8410 DestRegForSubregToReg)
8411 .addReg(DestRegForMiddleIndex, getKillRegState(true))
8412 .addImm(SubregType);
8413 InstrIdxForVirtReg.insert(
8414 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
8415 InsInstrs.push_back(SubRegToRegInstr);
8416
8417 // Load remaining lanes into register 1.
8418 auto LanesToLoadToReg1 =
8419 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
8420 LoadToLaneInstrsAscending.end());
8421 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
8422 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
8423 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8424 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8425 OffsetRegOperand.getReg(),
8426 OffsetRegOperand.isKill());
8427
8428 // Do not add the last reg to DelInstrs - it will be removed later.
8429 if (Index == NumLanes / 2 - 2) {
8430 break;
8431 }
8432 DelInstrs.push_back(LoadInstr);
8433 }
8434 Register LastLoadReg1 = PrevReg;
8435
8436 // Create the final zip instruction to combine the results.
8437 MachineInstrBuilder ZipInstr =
8438 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
8439 Root.getOperand(0).getReg())
8440 .addReg(LastLoadReg0)
8441 .addReg(LastLoadReg1);
8442 InsInstrs.push_back(ZipInstr);
8443}
8444
8458
8459/// Return true when there is potentially a faster code sequence for an
8460/// instruction chain ending in \p Root. All potential patterns are listed in
8461/// the \p Pattern vector. Pattern should be sorted in priority order since the
8462/// pattern evaluator stops checking as soon as it finds a faster sequence.
8463
8464bool AArch64InstrInfo::getMachineCombinerPatterns(
8465 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
8466 bool DoRegPressureReduce) const {
8467 // Integer patterns
8468 if (getMaddPatterns(Root, Patterns))
8469 return true;
8470 // Floating point patterns
8471 if (getFMULPatterns(Root, Patterns))
8472 return true;
8473 if (getFMAPatterns(Root, Patterns))
8474 return true;
8475 if (getFNEGPatterns(Root, Patterns))
8476 return true;
8477
8478 // Other patterns
8479 if (getMiscPatterns(Root, Patterns))
8480 return true;
8481
8482 // Load patterns
8483 if (getLoadPatterns(Root, Patterns))
8484 return true;
8485
8486 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
8487 DoRegPressureReduce);
8488}
8489
8491/// genFusedMultiply - Generate fused multiply instructions.
8492/// This function supports both integer and floating point instructions.
8493/// A typical example:
8494/// F|MUL I=A,B,0
8495/// F|ADD R,I,C
8496/// ==> F|MADD R,A,B,C
8497/// \param MF Containing MachineFunction
8498/// \param MRI Register information
8499/// \param TII Target information
8500/// \param Root is the F|ADD instruction
8501/// \param [out] InsInstrs is a vector of machine instructions and will
8502/// contain the generated madd instruction
8503/// \param IdxMulOpd is index of operand in Root that is the result of
8504/// the F|MUL. In the example above IdxMulOpd is 1.
8505/// \param MaddOpc the opcode fo the f|madd instruction
8506/// \param RC Register class of operands
8507/// \param kind of fma instruction (addressing mode) to be generated
8508/// \param ReplacedAddend is the result register from the instruction
8509/// replacing the non-combined operand, if any.
8510static MachineInstr *
8512 const TargetInstrInfo *TII, MachineInstr &Root,
8513 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
8514 unsigned MaddOpc, const TargetRegisterClass *RC,
8516 const Register *ReplacedAddend = nullptr) {
8517 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8518
8519 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
8520 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8521 Register ResultReg = Root.getOperand(0).getReg();
8522 Register SrcReg0 = MUL->getOperand(1).getReg();
8523 bool Src0IsKill = MUL->getOperand(1).isKill();
8524 Register SrcReg1 = MUL->getOperand(2).getReg();
8525 bool Src1IsKill = MUL->getOperand(2).isKill();
8526
8527 Register SrcReg2;
8528 bool Src2IsKill;
8529 if (ReplacedAddend) {
8530 // If we just generated a new addend, we must be it's only use.
8531 SrcReg2 = *ReplacedAddend;
8532 Src2IsKill = true;
8533 } else {
8534 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
8535 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
8536 }
8537
8538 if (ResultReg.isVirtual())
8539 MRI.constrainRegClass(ResultReg, RC);
8540 if (SrcReg0.isVirtual())
8541 MRI.constrainRegClass(SrcReg0, RC);
8542 if (SrcReg1.isVirtual())
8543 MRI.constrainRegClass(SrcReg1, RC);
8544 if (SrcReg2.isVirtual())
8545 MRI.constrainRegClass(SrcReg2, RC);
8546
8548 if (kind == FMAInstKind::Default)
8549 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8550 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8551 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8552 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8553 else if (kind == FMAInstKind::Indexed)
8554 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8555 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8556 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8557 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8558 .addImm(MUL->getOperand(3).getImm());
8559 else if (kind == FMAInstKind::Accumulator)
8560 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8561 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8562 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8563 .addReg(SrcReg1, getKillRegState(Src1IsKill));
8564 else
8565 assert(false && "Invalid FMA instruction kind \n");
8566 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
8567 InsInstrs.push_back(MIB);
8568 return MUL;
8569}
8570
8571static MachineInstr *
8573 const TargetInstrInfo *TII, MachineInstr &Root,
8575 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8576
8577 unsigned Opc = 0;
8578 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
8579 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
8580 Opc = AArch64::FNMADDSrrr;
8581 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
8582 Opc = AArch64::FNMADDDrrr;
8583 else
8584 return nullptr;
8585
8586 Register ResultReg = Root.getOperand(0).getReg();
8587 Register SrcReg0 = MAD->getOperand(1).getReg();
8588 Register SrcReg1 = MAD->getOperand(2).getReg();
8589 Register SrcReg2 = MAD->getOperand(3).getReg();
8590 bool Src0IsKill = MAD->getOperand(1).isKill();
8591 bool Src1IsKill = MAD->getOperand(2).isKill();
8592 bool Src2IsKill = MAD->getOperand(3).isKill();
8593 if (ResultReg.isVirtual())
8594 MRI.constrainRegClass(ResultReg, RC);
8595 if (SrcReg0.isVirtual())
8596 MRI.constrainRegClass(SrcReg0, RC);
8597 if (SrcReg1.isVirtual())
8598 MRI.constrainRegClass(SrcReg1, RC);
8599 if (SrcReg2.isVirtual())
8600 MRI.constrainRegClass(SrcReg2, RC);
8601
8603 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8604 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8605 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8606 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8607 InsInstrs.push_back(MIB);
8608
8609 return MAD;
8610}
8611
8612/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8613static MachineInstr *
8616 unsigned IdxDupOp, unsigned MulOpc,
8617 const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
8618 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8619 "Invalid index of FMUL operand");
8620
8621 MachineFunction &MF = *Root.getMF();
8623
8624 MachineInstr *Dup =
8625 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8626
8627 if (Dup->getOpcode() == TargetOpcode::COPY)
8628 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8629
8630 Register DupSrcReg = Dup->getOperand(1).getReg();
8631 MRI.clearKillFlags(DupSrcReg);
8632 MRI.constrainRegClass(DupSrcReg, RC);
8633
8634 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8635
8636 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8637 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8638
8639 Register ResultReg = Root.getOperand(0).getReg();
8640
8642 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8643 .add(MulOp)
8644 .addReg(DupSrcReg)
8645 .addImm(DupSrcLane);
8646
8647 InsInstrs.push_back(MIB);
8648 return &Root;
8649}
8650
8651/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8652/// instructions.
8653///
8654/// \see genFusedMultiply
8658 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8659 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8661}
8662
8663/// genNeg - Helper to generate an intermediate negation of the second operand
8664/// of Root
8666 const TargetInstrInfo *TII, MachineInstr &Root,
8668 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8669 unsigned MnegOpc, const TargetRegisterClass *RC) {
8670 Register NewVR = MRI.createVirtualRegister(RC);
8672 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8673 .add(Root.getOperand(2));
8674 InsInstrs.push_back(MIB);
8675
8676 assert(InstrIdxForVirtReg.empty());
8677 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8678
8679 return NewVR;
8680}
8681
8682/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8683/// instructions with an additional negation of the accumulator
8687 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8688 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8689 assert(IdxMulOpd == 1);
8690
8691 Register NewVR =
8692 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8693 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8694 FMAInstKind::Accumulator, &NewVR);
8695}
8696
8697/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8698/// instructions.
8699///
8700/// \see genFusedMultiply
8704 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8705 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8707}
8708
8709/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8710/// instructions with an additional negation of the accumulator
8714 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8715 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8716 assert(IdxMulOpd == 1);
8717
8718 Register NewVR =
8719 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8720
8721 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8722 FMAInstKind::Indexed, &NewVR);
8723}
8724
8725/// genMaddR - Generate madd instruction and combine mul and add using
8726/// an extra virtual register
8727/// Example - an ADD intermediate needs to be stored in a register:
8728/// MUL I=A,B,0
8729/// ADD R,I,Imm
8730/// ==> ORR V, ZR, Imm
8731/// ==> MADD R,A,B,V
8732/// \param MF Containing MachineFunction
8733/// \param MRI Register information
8734/// \param TII Target information
8735/// \param Root is the ADD instruction
8736/// \param [out] InsInstrs is a vector of machine instructions and will
8737/// contain the generated madd instruction
8738/// \param IdxMulOpd is index of operand in Root that is the result of
8739/// the MUL. In the example above IdxMulOpd is 1.
8740/// \param MaddOpc the opcode fo the madd instruction
8741/// \param VR is a virtual register that holds the value of an ADD operand
8742/// (V in the example above).
8743/// \param RC Register class of operands
8745 const TargetInstrInfo *TII, MachineInstr &Root,
8747 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8748 const TargetRegisterClass *RC) {
8749 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8750
8751 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8752 Register ResultReg = Root.getOperand(0).getReg();
8753 Register SrcReg0 = MUL->getOperand(1).getReg();
8754 bool Src0IsKill = MUL->getOperand(1).isKill();
8755 Register SrcReg1 = MUL->getOperand(2).getReg();
8756 bool Src1IsKill = MUL->getOperand(2).isKill();
8757
8758 if (ResultReg.isVirtual())
8759 MRI.constrainRegClass(ResultReg, RC);
8760 if (SrcReg0.isVirtual())
8761 MRI.constrainRegClass(SrcReg0, RC);
8762 if (SrcReg1.isVirtual())
8763 MRI.constrainRegClass(SrcReg1, RC);
8765 MRI.constrainRegClass(VR, RC);
8766
8768 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8769 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8770 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8771 .addReg(VR);
8772 // Insert the MADD
8773 InsInstrs.push_back(MIB);
8774 return MUL;
8775}
8776
8777/// Do the following transformation
8778/// A - (B + C) ==> (A - B) - C
8779/// A - (B + C) ==> (A - C) - B
8781 const TargetInstrInfo *TII, MachineInstr &Root,
8784 unsigned IdxOpd1,
8785 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8786 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8787 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8788 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8789
8790 Register ResultReg = Root.getOperand(0).getReg();
8791 Register RegA = Root.getOperand(1).getReg();
8792 bool RegAIsKill = Root.getOperand(1).isKill();
8793 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8794 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8795 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8796 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8797 Register NewVR =
8799
8800 unsigned Opcode = Root.getOpcode();
8801 if (Opcode == AArch64::SUBSWrr)
8802 Opcode = AArch64::SUBWrr;
8803 else if (Opcode == AArch64::SUBSXrr)
8804 Opcode = AArch64::SUBXrr;
8805 else
8806 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8807 "Unexpected instruction opcode.");
8808
8809 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8810 Flags &= ~MachineInstr::NoSWrap;
8811 Flags &= ~MachineInstr::NoUWrap;
8812
8813 MachineInstrBuilder MIB1 =
8814 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8815 .addReg(RegA, getKillRegState(RegAIsKill))
8816 .addReg(RegB, getKillRegState(RegBIsKill))
8817 .setMIFlags(Flags);
8818 MachineInstrBuilder MIB2 =
8819 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8820 .addReg(NewVR, getKillRegState(true))
8821 .addReg(RegC, getKillRegState(RegCIsKill))
8822 .setMIFlags(Flags);
8823
8824 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8825 InsInstrs.push_back(MIB1);
8826 InsInstrs.push_back(MIB2);
8827 DelInstrs.push_back(AddMI);
8828 DelInstrs.push_back(&Root);
8829}
8830
8831unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8832 unsigned int AccumulatorOpCode) const {
8833 switch (AccumulatorOpCode) {
8834 case AArch64::UABALB_ZZZ_D:
8835 case AArch64::SABALB_ZZZ_D:
8836 case AArch64::UABALT_ZZZ_D:
8837 case AArch64::SABALT_ZZZ_D:
8838 return AArch64::ADD_ZZZ_D;
8839 case AArch64::UABALB_ZZZ_H:
8840 case AArch64::SABALB_ZZZ_H:
8841 case AArch64::UABALT_ZZZ_H:
8842 case AArch64::SABALT_ZZZ_H:
8843 return AArch64::ADD_ZZZ_H;
8844 case AArch64::UABALB_ZZZ_S:
8845 case AArch64::SABALB_ZZZ_S:
8846 case AArch64::UABALT_ZZZ_S:
8847 case AArch64::SABALT_ZZZ_S:
8848 return AArch64::ADD_ZZZ_S;
8849 case AArch64::UABALv16i8_v8i16:
8850 case AArch64::SABALv8i8_v8i16:
8851 case AArch64::SABAv8i16:
8852 case AArch64::UABAv8i16:
8853 return AArch64::ADDv8i16;
8854 case AArch64::SABALv2i32_v2i64:
8855 case AArch64::UABALv2i32_v2i64:
8856 case AArch64::SABALv4i32_v2i64:
8857 return AArch64::ADDv2i64;
8858 case AArch64::UABALv4i16_v4i32:
8859 case AArch64::SABALv4i16_v4i32:
8860 case AArch64::SABALv8i16_v4i32:
8861 case AArch64::SABAv4i32:
8862 case AArch64::UABAv4i32:
8863 return AArch64::ADDv4i32;
8864 case AArch64::UABALv4i32_v2i64:
8865 return AArch64::ADDv2i64;
8866 case AArch64::UABALv8i16_v4i32:
8867 return AArch64::ADDv4i32;
8868 case AArch64::UABALv8i8_v8i16:
8869 case AArch64::SABALv16i8_v8i16:
8870 return AArch64::ADDv8i16;
8871 case AArch64::UABAv16i8:
8872 case AArch64::SABAv16i8:
8873 return AArch64::ADDv16i8;
8874 case AArch64::UABAv4i16:
8875 case AArch64::SABAv4i16:
8876 return AArch64::ADDv4i16;
8877 case AArch64::UABAv2i32:
8878 case AArch64::SABAv2i32:
8879 return AArch64::ADDv2i32;
8880 case AArch64::UABAv8i8:
8881 case AArch64::SABAv8i8:
8882 return AArch64::ADDv8i8;
8883 default:
8884 llvm_unreachable("Unknown accumulator opcode");
8885 }
8886}
8887
8888/// When getMachineCombinerPatterns() finds potential patterns,
8889/// this function generates the instructions that could replace the
8890/// original code sequence
8891void AArch64InstrInfo::genAlternativeCodeSequence(
8892 MachineInstr &Root, unsigned Pattern,
8895 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8896 MachineBasicBlock &MBB = *Root.getParent();
8897 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8898 MachineFunction &MF = *MBB.getParent();
8899 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8900
8901 MachineInstr *MUL = nullptr;
8902 const TargetRegisterClass *RC;
8903 unsigned Opc;
8904 switch (Pattern) {
8905 default:
8906 // Reassociate instructions.
8907 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8908 DelInstrs, InstrIdxForVirtReg);
8909 return;
8911 // A - (B + C)
8912 // ==> (A - B) - C
8913 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8914 InstrIdxForVirtReg);
8915 return;
8917 // A - (B + C)
8918 // ==> (A - C) - B
8919 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8920 InstrIdxForVirtReg);
8921 return;
8924 // MUL I=A,B,0
8925 // ADD R,I,C
8926 // ==> MADD R,A,B,C
8927 // --- Create(MADD);
8929 Opc = AArch64::MADDWrrr;
8930 RC = &AArch64::GPR32RegClass;
8931 } else {
8932 Opc = AArch64::MADDXrrr;
8933 RC = &AArch64::GPR64RegClass;
8934 }
8935 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8936 break;
8939 // MUL I=A,B,0
8940 // ADD R,C,I
8941 // ==> MADD R,A,B,C
8942 // --- Create(MADD);
8944 Opc = AArch64::MADDWrrr;
8945 RC = &AArch64::GPR32RegClass;
8946 } else {
8947 Opc = AArch64::MADDXrrr;
8948 RC = &AArch64::GPR64RegClass;
8949 }
8950 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8951 break;
8956 // MUL I=A,B,0
8957 // ADD/SUB R,I,Imm
8958 // ==> MOV V, Imm/-Imm
8959 // ==> MADD R,A,B,V
8960 // --- Create(MADD);
8961 const TargetRegisterClass *RC;
8962 unsigned BitSize, MovImm;
8965 MovImm = AArch64::MOVi32imm;
8966 RC = &AArch64::GPR32spRegClass;
8967 BitSize = 32;
8968 Opc = AArch64::MADDWrrr;
8969 RC = &AArch64::GPR32RegClass;
8970 } else {
8971 MovImm = AArch64::MOVi64imm;
8972 RC = &AArch64::GPR64spRegClass;
8973 BitSize = 64;
8974 Opc = AArch64::MADDXrrr;
8975 RC = &AArch64::GPR64RegClass;
8976 }
8977 Register NewVR = MRI.createVirtualRegister(RC);
8978 uint64_t Imm = Root.getOperand(2).getImm();
8979
8980 if (Root.getOperand(3).isImm()) {
8981 unsigned Val = Root.getOperand(3).getImm();
8982 Imm = Imm << Val;
8983 }
8984 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8986 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8987 // Check that the immediate can be composed via a single instruction.
8989 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8990 if (Insn.size() != 1)
8991 return;
8992 MachineInstrBuilder MIB1 =
8993 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8994 .addImm(IsSub ? -Imm : Imm);
8995 InsInstrs.push_back(MIB1);
8996 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8997 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8998 break;
8999 }
9002 // MUL I=A,B,0
9003 // SUB R,I, C
9004 // ==> SUB V, 0, C
9005 // ==> MADD R,A,B,V // = -C + A*B
9006 // --- Create(MADD);
9007 const TargetRegisterClass *SubRC;
9008 unsigned SubOpc, ZeroReg;
9010 SubOpc = AArch64::SUBWrr;
9011 SubRC = &AArch64::GPR32spRegClass;
9012 ZeroReg = AArch64::WZR;
9013 Opc = AArch64::MADDWrrr;
9014 RC = &AArch64::GPR32RegClass;
9015 } else {
9016 SubOpc = AArch64::SUBXrr;
9017 SubRC = &AArch64::GPR64spRegClass;
9018 ZeroReg = AArch64::XZR;
9019 Opc = AArch64::MADDXrrr;
9020 RC = &AArch64::GPR64RegClass;
9021 }
9022 Register NewVR = MRI.createVirtualRegister(SubRC);
9023 // SUB NewVR, 0, C
9024 MachineInstrBuilder MIB1 =
9025 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
9026 .addReg(ZeroReg)
9027 .add(Root.getOperand(2));
9028 InsInstrs.push_back(MIB1);
9029 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9030 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
9031 break;
9032 }
9035 // MUL I=A,B,0
9036 // SUB R,C,I
9037 // ==> MSUB R,A,B,C (computes C - A*B)
9038 // --- Create(MSUB);
9040 Opc = AArch64::MSUBWrrr;
9041 RC = &AArch64::GPR32RegClass;
9042 } else {
9043 Opc = AArch64::MSUBXrrr;
9044 RC = &AArch64::GPR64RegClass;
9045 }
9046 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9047 break;
9049 Opc = AArch64::MLAv8i8;
9050 RC = &AArch64::FPR64RegClass;
9051 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9052 break;
9054 Opc = AArch64::MLAv8i8;
9055 RC = &AArch64::FPR64RegClass;
9056 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9057 break;
9059 Opc = AArch64::MLAv16i8;
9060 RC = &AArch64::FPR128RegClass;
9061 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9062 break;
9064 Opc = AArch64::MLAv16i8;
9065 RC = &AArch64::FPR128RegClass;
9066 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9067 break;
9069 Opc = AArch64::MLAv4i16;
9070 RC = &AArch64::FPR64RegClass;
9071 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9072 break;
9074 Opc = AArch64::MLAv4i16;
9075 RC = &AArch64::FPR64RegClass;
9076 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9077 break;
9079 Opc = AArch64::MLAv8i16;
9080 RC = &AArch64::FPR128RegClass;
9081 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9082 break;
9084 Opc = AArch64::MLAv8i16;
9085 RC = &AArch64::FPR128RegClass;
9086 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9087 break;
9089 Opc = AArch64::MLAv2i32;
9090 RC = &AArch64::FPR64RegClass;
9091 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9092 break;
9094 Opc = AArch64::MLAv2i32;
9095 RC = &AArch64::FPR64RegClass;
9096 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9097 break;
9099 Opc = AArch64::MLAv4i32;
9100 RC = &AArch64::FPR128RegClass;
9101 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9102 break;
9104 Opc = AArch64::MLAv4i32;
9105 RC = &AArch64::FPR128RegClass;
9106 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9107 break;
9108
9110 Opc = AArch64::MLAv8i8;
9111 RC = &AArch64::FPR64RegClass;
9112 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9113 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
9114 RC);
9115 break;
9117 Opc = AArch64::MLSv8i8;
9118 RC = &AArch64::FPR64RegClass;
9119 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9120 break;
9122 Opc = AArch64::MLAv16i8;
9123 RC = &AArch64::FPR128RegClass;
9124 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9125 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
9126 RC);
9127 break;
9129 Opc = AArch64::MLSv16i8;
9130 RC = &AArch64::FPR128RegClass;
9131 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9132 break;
9134 Opc = AArch64::MLAv4i16;
9135 RC = &AArch64::FPR64RegClass;
9136 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9137 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
9138 RC);
9139 break;
9141 Opc = AArch64::MLSv4i16;
9142 RC = &AArch64::FPR64RegClass;
9143 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9144 break;
9146 Opc = AArch64::MLAv8i16;
9147 RC = &AArch64::FPR128RegClass;
9148 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9149 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
9150 RC);
9151 break;
9153 Opc = AArch64::MLSv8i16;
9154 RC = &AArch64::FPR128RegClass;
9155 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9156 break;
9158 Opc = AArch64::MLAv2i32;
9159 RC = &AArch64::FPR64RegClass;
9160 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9161 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
9162 RC);
9163 break;
9165 Opc = AArch64::MLSv2i32;
9166 RC = &AArch64::FPR64RegClass;
9167 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9168 break;
9170 Opc = AArch64::MLAv4i32;
9171 RC = &AArch64::FPR128RegClass;
9172 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9173 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
9174 RC);
9175 break;
9177 Opc = AArch64::MLSv4i32;
9178 RC = &AArch64::FPR128RegClass;
9179 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9180 break;
9181
9183 Opc = AArch64::MLAv4i16_indexed;
9184 RC = &AArch64::FPR64RegClass;
9185 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9186 break;
9188 Opc = AArch64::MLAv4i16_indexed;
9189 RC = &AArch64::FPR64RegClass;
9190 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9191 break;
9193 Opc = AArch64::MLAv8i16_indexed;
9194 RC = &AArch64::FPR128RegClass;
9195 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9196 break;
9198 Opc = AArch64::MLAv8i16_indexed;
9199 RC = &AArch64::FPR128RegClass;
9200 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9201 break;
9203 Opc = AArch64::MLAv2i32_indexed;
9204 RC = &AArch64::FPR64RegClass;
9205 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9206 break;
9208 Opc = AArch64::MLAv2i32_indexed;
9209 RC = &AArch64::FPR64RegClass;
9210 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9211 break;
9213 Opc = AArch64::MLAv4i32_indexed;
9214 RC = &AArch64::FPR128RegClass;
9215 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9216 break;
9218 Opc = AArch64::MLAv4i32_indexed;
9219 RC = &AArch64::FPR128RegClass;
9220 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9221 break;
9222
9224 Opc = AArch64::MLAv4i16_indexed;
9225 RC = &AArch64::FPR64RegClass;
9226 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9227 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
9228 RC);
9229 break;
9231 Opc = AArch64::MLSv4i16_indexed;
9232 RC = &AArch64::FPR64RegClass;
9233 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9234 break;
9236 Opc = AArch64::MLAv8i16_indexed;
9237 RC = &AArch64::FPR128RegClass;
9238 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9239 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
9240 RC);
9241 break;
9243 Opc = AArch64::MLSv8i16_indexed;
9244 RC = &AArch64::FPR128RegClass;
9245 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9246 break;
9248 Opc = AArch64::MLAv2i32_indexed;
9249 RC = &AArch64::FPR64RegClass;
9250 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9251 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
9252 RC);
9253 break;
9255 Opc = AArch64::MLSv2i32_indexed;
9256 RC = &AArch64::FPR64RegClass;
9257 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9258 break;
9260 Opc = AArch64::MLAv4i32_indexed;
9261 RC = &AArch64::FPR128RegClass;
9262 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9263 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
9264 RC);
9265 break;
9267 Opc = AArch64::MLSv4i32_indexed;
9268 RC = &AArch64::FPR128RegClass;
9269 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9270 break;
9271
9272 // Floating Point Support
9274 Opc = AArch64::FMADDHrrr;
9275 RC = &AArch64::FPR16RegClass;
9276 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9277 break;
9279 Opc = AArch64::FMADDSrrr;
9280 RC = &AArch64::FPR32RegClass;
9281 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9282 break;
9284 Opc = AArch64::FMADDDrrr;
9285 RC = &AArch64::FPR64RegClass;
9286 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9287 break;
9288
9290 Opc = AArch64::FMADDHrrr;
9291 RC = &AArch64::FPR16RegClass;
9292 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9293 break;
9295 Opc = AArch64::FMADDSrrr;
9296 RC = &AArch64::FPR32RegClass;
9297 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9298 break;
9300 Opc = AArch64::FMADDDrrr;
9301 RC = &AArch64::FPR64RegClass;
9302 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9303 break;
9304
9306 Opc = AArch64::FMLAv1i32_indexed;
9307 RC = &AArch64::FPR32RegClass;
9308 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9310 break;
9312 Opc = AArch64::FMLAv1i32_indexed;
9313 RC = &AArch64::FPR32RegClass;
9314 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9316 break;
9317
9319 Opc = AArch64::FMLAv1i64_indexed;
9320 RC = &AArch64::FPR64RegClass;
9321 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9323 break;
9325 Opc = AArch64::FMLAv1i64_indexed;
9326 RC = &AArch64::FPR64RegClass;
9327 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9329 break;
9330
9332 RC = &AArch64::FPR64RegClass;
9333 Opc = AArch64::FMLAv4i16_indexed;
9334 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9336 break;
9338 RC = &AArch64::FPR64RegClass;
9339 Opc = AArch64::FMLAv4f16;
9340 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9342 break;
9344 RC = &AArch64::FPR64RegClass;
9345 Opc = AArch64::FMLAv4i16_indexed;
9346 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9348 break;
9350 RC = &AArch64::FPR64RegClass;
9351 Opc = AArch64::FMLAv4f16;
9352 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9354 break;
9355
9358 RC = &AArch64::FPR64RegClass;
9360 Opc = AArch64::FMLAv2i32_indexed;
9361 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9363 } else {
9364 Opc = AArch64::FMLAv2f32;
9365 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9367 }
9368 break;
9371 RC = &AArch64::FPR64RegClass;
9373 Opc = AArch64::FMLAv2i32_indexed;
9374 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9376 } else {
9377 Opc = AArch64::FMLAv2f32;
9378 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9380 }
9381 break;
9382
9384 RC = &AArch64::FPR128RegClass;
9385 Opc = AArch64::FMLAv8i16_indexed;
9386 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9388 break;
9390 RC = &AArch64::FPR128RegClass;
9391 Opc = AArch64::FMLAv8f16;
9392 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9394 break;
9396 RC = &AArch64::FPR128RegClass;
9397 Opc = AArch64::FMLAv8i16_indexed;
9398 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9400 break;
9402 RC = &AArch64::FPR128RegClass;
9403 Opc = AArch64::FMLAv8f16;
9404 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9406 break;
9407
9410 RC = &AArch64::FPR128RegClass;
9412 Opc = AArch64::FMLAv2i64_indexed;
9413 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9415 } else {
9416 Opc = AArch64::FMLAv2f64;
9417 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9419 }
9420 break;
9423 RC = &AArch64::FPR128RegClass;
9425 Opc = AArch64::FMLAv2i64_indexed;
9426 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9428 } else {
9429 Opc = AArch64::FMLAv2f64;
9430 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9432 }
9433 break;
9434
9437 RC = &AArch64::FPR128RegClass;
9439 Opc = AArch64::FMLAv4i32_indexed;
9440 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9442 } else {
9443 Opc = AArch64::FMLAv4f32;
9444 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9446 }
9447 break;
9448
9451 RC = &AArch64::FPR128RegClass;
9453 Opc = AArch64::FMLAv4i32_indexed;
9454 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9456 } else {
9457 Opc = AArch64::FMLAv4f32;
9458 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9460 }
9461 break;
9462
9464 Opc = AArch64::FNMSUBHrrr;
9465 RC = &AArch64::FPR16RegClass;
9466 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9467 break;
9469 Opc = AArch64::FNMSUBSrrr;
9470 RC = &AArch64::FPR32RegClass;
9471 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9472 break;
9474 Opc = AArch64::FNMSUBDrrr;
9475 RC = &AArch64::FPR64RegClass;
9476 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9477 break;
9478
9480 Opc = AArch64::FNMADDHrrr;
9481 RC = &AArch64::FPR16RegClass;
9482 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9483 break;
9485 Opc = AArch64::FNMADDSrrr;
9486 RC = &AArch64::FPR32RegClass;
9487 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9488 break;
9490 Opc = AArch64::FNMADDDrrr;
9491 RC = &AArch64::FPR64RegClass;
9492 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9493 break;
9494
9496 Opc = AArch64::FMSUBHrrr;
9497 RC = &AArch64::FPR16RegClass;
9498 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9499 break;
9501 Opc = AArch64::FMSUBSrrr;
9502 RC = &AArch64::FPR32RegClass;
9503 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9504 break;
9506 Opc = AArch64::FMSUBDrrr;
9507 RC = &AArch64::FPR64RegClass;
9508 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9509 break;
9510
9512 Opc = AArch64::FMLSv1i32_indexed;
9513 RC = &AArch64::FPR32RegClass;
9514 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9516 break;
9517
9519 Opc = AArch64::FMLSv1i64_indexed;
9520 RC = &AArch64::FPR64RegClass;
9521 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9523 break;
9524
9527 RC = &AArch64::FPR64RegClass;
9528 Register NewVR = MRI.createVirtualRegister(RC);
9529 MachineInstrBuilder MIB1 =
9530 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
9531 .add(Root.getOperand(2));
9532 InsInstrs.push_back(MIB1);
9533 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9535 Opc = AArch64::FMLAv4f16;
9536 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9537 FMAInstKind::Accumulator, &NewVR);
9538 } else {
9539 Opc = AArch64::FMLAv4i16_indexed;
9540 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9541 FMAInstKind::Indexed, &NewVR);
9542 }
9543 break;
9544 }
9546 RC = &AArch64::FPR64RegClass;
9547 Opc = AArch64::FMLSv4f16;
9548 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9550 break;
9552 RC = &AArch64::FPR64RegClass;
9553 Opc = AArch64::FMLSv4i16_indexed;
9554 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9556 break;
9557
9560 RC = &AArch64::FPR64RegClass;
9562 Opc = AArch64::FMLSv2i32_indexed;
9563 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9565 } else {
9566 Opc = AArch64::FMLSv2f32;
9567 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9569 }
9570 break;
9571
9574 RC = &AArch64::FPR128RegClass;
9575 Register NewVR = MRI.createVirtualRegister(RC);
9576 MachineInstrBuilder MIB1 =
9577 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
9578 .add(Root.getOperand(2));
9579 InsInstrs.push_back(MIB1);
9580 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9582 Opc = AArch64::FMLAv8f16;
9583 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9584 FMAInstKind::Accumulator, &NewVR);
9585 } else {
9586 Opc = AArch64::FMLAv8i16_indexed;
9587 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9588 FMAInstKind::Indexed, &NewVR);
9589 }
9590 break;
9591 }
9593 RC = &AArch64::FPR128RegClass;
9594 Opc = AArch64::FMLSv8f16;
9595 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9597 break;
9599 RC = &AArch64::FPR128RegClass;
9600 Opc = AArch64::FMLSv8i16_indexed;
9601 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9603 break;
9604
9607 RC = &AArch64::FPR128RegClass;
9609 Opc = AArch64::FMLSv2i64_indexed;
9610 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9612 } else {
9613 Opc = AArch64::FMLSv2f64;
9614 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9616 }
9617 break;
9618
9621 RC = &AArch64::FPR128RegClass;
9623 Opc = AArch64::FMLSv4i32_indexed;
9624 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9626 } else {
9627 Opc = AArch64::FMLSv4f32;
9628 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9630 }
9631 break;
9634 RC = &AArch64::FPR64RegClass;
9635 Register NewVR = MRI.createVirtualRegister(RC);
9636 MachineInstrBuilder MIB1 =
9637 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9638 .add(Root.getOperand(2));
9639 InsInstrs.push_back(MIB1);
9640 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9642 Opc = AArch64::FMLAv2i32_indexed;
9643 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9644 FMAInstKind::Indexed, &NewVR);
9645 } else {
9646 Opc = AArch64::FMLAv2f32;
9647 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9648 FMAInstKind::Accumulator, &NewVR);
9649 }
9650 break;
9651 }
9654 RC = &AArch64::FPR128RegClass;
9655 Register NewVR = MRI.createVirtualRegister(RC);
9656 MachineInstrBuilder MIB1 =
9657 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9658 .add(Root.getOperand(2));
9659 InsInstrs.push_back(MIB1);
9660 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9662 Opc = AArch64::FMLAv4i32_indexed;
9663 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9664 FMAInstKind::Indexed, &NewVR);
9665 } else {
9666 Opc = AArch64::FMLAv4f32;
9667 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9668 FMAInstKind::Accumulator, &NewVR);
9669 }
9670 break;
9671 }
9674 RC = &AArch64::FPR128RegClass;
9675 Register NewVR = MRI.createVirtualRegister(RC);
9676 MachineInstrBuilder MIB1 =
9677 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9678 .add(Root.getOperand(2));
9679 InsInstrs.push_back(MIB1);
9680 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9682 Opc = AArch64::FMLAv2i64_indexed;
9683 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9684 FMAInstKind::Indexed, &NewVR);
9685 } else {
9686 Opc = AArch64::FMLAv2f64;
9687 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9688 FMAInstKind::Accumulator, &NewVR);
9689 }
9690 break;
9691 }
9694 unsigned IdxDupOp =
9696 : 2;
9697 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9698 &AArch64::FPR128RegClass, MRI);
9699 break;
9700 }
9703 unsigned IdxDupOp =
9705 : 2;
9706 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9707 &AArch64::FPR128RegClass, MRI);
9708 break;
9709 }
9712 unsigned IdxDupOp =
9714 : 2;
9715 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9716 &AArch64::FPR128_loRegClass, MRI);
9717 break;
9718 }
9721 unsigned IdxDupOp =
9723 : 2;
9724 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9725 &AArch64::FPR128RegClass, MRI);
9726 break;
9727 }
9730 unsigned IdxDupOp =
9732 : 2;
9733 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9734 &AArch64::FPR128_loRegClass, MRI);
9735 break;
9736 }
9738 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9739 break;
9740 }
9742 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9743 Pattern, 4);
9744 break;
9745 }
9747 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9748 Pattern, 8);
9749 break;
9750 }
9752 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9753 Pattern, 16);
9754 break;
9755 }
9756
9757 } // end switch (Pattern)
9758 // Record MUL and ADD/SUB for deletion
9759 if (MUL)
9760 DelInstrs.push_back(MUL);
9761 DelInstrs.push_back(&Root);
9762
9763 // Set the flags on the inserted instructions to be the merged flags of the
9764 // instructions that we have combined.
9765 uint32_t Flags = Root.getFlags();
9766 if (MUL)
9767 Flags = Root.mergeFlagsWith(*MUL);
9768 for (auto *MI : InsInstrs)
9769 MI->setFlags(Flags);
9770}
9771
9772/// Replace csincr-branch sequence by simple conditional branch
9773///
9774/// Examples:
9775/// 1. \code
9776/// csinc w9, wzr, wzr, <condition code>
9777/// tbnz w9, #0, 0x44
9778/// \endcode
9779/// to
9780/// \code
9781/// b.<inverted condition code>
9782/// \endcode
9783///
9784/// 2. \code
9785/// csinc w9, wzr, wzr, <condition code>
9786/// tbz w9, #0, 0x44
9787/// \endcode
9788/// to
9789/// \code
9790/// b.<condition code>
9791/// \endcode
9792///
9793/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9794/// compare's constant operand is power of 2.
9795///
9796/// Examples:
9797/// \code
9798/// and w8, w8, #0x400
9799/// cbnz w8, L1
9800/// \endcode
9801/// to
9802/// \code
9803/// tbnz w8, #10, L1
9804/// \endcode
9805///
9806/// \param MI Conditional Branch
9807/// \return True when the simple conditional branch is generated
9808///
9810 bool IsNegativeBranch = false;
9811 bool IsTestAndBranch = false;
9812 unsigned TargetBBInMI = 0;
9813 switch (MI.getOpcode()) {
9814 default:
9815 llvm_unreachable("Unknown branch instruction?");
9816 case AArch64::Bcc:
9817 case AArch64::CBWPri:
9818 case AArch64::CBXPri:
9819 case AArch64::CBBAssertExt:
9820 case AArch64::CBHAssertExt:
9821 case AArch64::CBWPrr:
9822 case AArch64::CBXPrr:
9823 return false;
9824 case AArch64::CBZW:
9825 case AArch64::CBZX:
9826 TargetBBInMI = 1;
9827 break;
9828 case AArch64::CBNZW:
9829 case AArch64::CBNZX:
9830 TargetBBInMI = 1;
9831 IsNegativeBranch = true;
9832 break;
9833 case AArch64::TBZW:
9834 case AArch64::TBZX:
9835 TargetBBInMI = 2;
9836 IsTestAndBranch = true;
9837 break;
9838 case AArch64::TBNZW:
9839 case AArch64::TBNZX:
9840 TargetBBInMI = 2;
9841 IsNegativeBranch = true;
9842 IsTestAndBranch = true;
9843 break;
9844 }
9845 // So we increment a zero register and test for bits other
9846 // than bit 0? Conservatively bail out in case the verifier
9847 // missed this case.
9848 if (IsTestAndBranch && MI.getOperand(1).getImm())
9849 return false;
9850
9851 // Find Definition.
9852 assert(MI.getParent() && "Incomplete machine instruction\n");
9853 MachineBasicBlock *MBB = MI.getParent();
9854 MachineFunction *MF = MBB->getParent();
9855 MachineRegisterInfo *MRI = &MF->getRegInfo();
9856 Register VReg = MI.getOperand(0).getReg();
9857 if (!VReg.isVirtual())
9858 return false;
9859
9860 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9861
9862 // Look through COPY instructions to find definition.
9863 while (DefMI->isCopy()) {
9864 Register CopyVReg = DefMI->getOperand(1).getReg();
9865 if (!MRI->hasOneNonDBGUse(CopyVReg))
9866 return false;
9867 if (!MRI->hasOneDef(CopyVReg))
9868 return false;
9869 DefMI = MRI->getVRegDef(CopyVReg);
9870 }
9871
9872 switch (DefMI->getOpcode()) {
9873 default:
9874 return false;
9875 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9876 case AArch64::ANDWri:
9877 case AArch64::ANDXri: {
9878 if (IsTestAndBranch)
9879 return false;
9880 if (DefMI->getParent() != MBB)
9881 return false;
9882 if (!MRI->hasOneNonDBGUse(VReg))
9883 return false;
9884
9885 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9887 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9888 if (!isPowerOf2_64(Mask))
9889 return false;
9890
9891 MachineOperand &MO = DefMI->getOperand(1);
9892 Register NewReg = MO.getReg();
9893 if (!NewReg.isVirtual())
9894 return false;
9895
9896 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9897
9898 MachineBasicBlock &RefToMBB = *MBB;
9899 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9900 DebugLoc DL = MI.getDebugLoc();
9901 unsigned Imm = Log2_64(Mask);
9902 unsigned Opc = (Imm < 32)
9903 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9904 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9905 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9906 .addReg(NewReg)
9907 .addImm(Imm)
9908 .addMBB(TBB);
9909 // Register lives on to the CBZ now.
9910 MO.setIsKill(false);
9911
9912 // For immediate smaller than 32, we need to use the 32-bit
9913 // variant (W) in all cases. Indeed the 64-bit variant does not
9914 // allow to encode them.
9915 // Therefore, if the input register is 64-bit, we need to take the
9916 // 32-bit sub-part.
9917 if (!Is32Bit && Imm < 32)
9918 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9919 MI.eraseFromParent();
9920 return true;
9921 }
9922 // Look for CSINC
9923 case AArch64::CSINCWr:
9924 case AArch64::CSINCXr: {
9925 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9926 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9927 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9928 DefMI->getOperand(2).getReg() == AArch64::XZR))
9929 return false;
9930
9931 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9932 true) != -1)
9933 return false;
9934
9935 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9936 // Convert only when the condition code is not modified between
9937 // the CSINC and the branch. The CC may be used by other
9938 // instructions in between.
9940 return false;
9941 MachineBasicBlock &RefToMBB = *MBB;
9942 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9943 DebugLoc DL = MI.getDebugLoc();
9944 if (IsNegativeBranch)
9946 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9947 MI.eraseFromParent();
9948 return true;
9949 }
9950 }
9951}
9952
9953std::pair<unsigned, unsigned>
9954AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9955 const unsigned Mask = AArch64II::MO_FRAGMENT;
9956 return std::make_pair(TF & Mask, TF & ~Mask);
9957}
9958
9960AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9961 using namespace AArch64II;
9962
9963 static const std::pair<unsigned, const char *> TargetFlags[] = {
9964 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9965 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9966 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9967 {MO_HI12, "aarch64-hi12"}};
9968 return ArrayRef(TargetFlags);
9969}
9970
9972AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9973 using namespace AArch64II;
9974
9975 static const std::pair<unsigned, const char *> TargetFlags[] = {
9976 {MO_COFFSTUB, "aarch64-coffstub"},
9977 {MO_GOT, "aarch64-got"},
9978 {MO_NC, "aarch64-nc"},
9979 {MO_S, "aarch64-s"},
9980 {MO_TLS, "aarch64-tls"},
9981 {MO_DLLIMPORT, "aarch64-dllimport"},
9982 {MO_PREL, "aarch64-prel"},
9983 {MO_TAGGED, "aarch64-tagged"},
9984 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9985 };
9986 return ArrayRef(TargetFlags);
9987}
9988
9990AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9991 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9992 {{MOSuppressPair, "aarch64-suppress-pair"},
9993 {MOStridedAccess, "aarch64-strided-access"}};
9994 return ArrayRef(TargetFlags);
9995}
9996
9997/// Constants defining how certain sequences should be outlined.
9998/// This encompasses how an outlined function should be called, and what kind of
9999/// frame should be emitted for that outlined function.
10000///
10001/// \p MachineOutlinerDefault implies that the function should be called with
10002/// a save and restore of LR to the stack.
10003///
10004/// That is,
10005///
10006/// I1 Save LR OUTLINED_FUNCTION:
10007/// I2 --> BL OUTLINED_FUNCTION I1
10008/// I3 Restore LR I2
10009/// I3
10010/// RET
10011///
10012/// * Call construction overhead: 3 (save + BL + restore)
10013/// * Frame construction overhead: 1 (ret)
10014/// * Requires stack fixups? Yes
10015///
10016/// \p MachineOutlinerTailCall implies that the function is being created from
10017/// a sequence of instructions ending in a return.
10018///
10019/// That is,
10020///
10021/// I1 OUTLINED_FUNCTION:
10022/// I2 --> B OUTLINED_FUNCTION I1
10023/// RET I2
10024/// RET
10025///
10026/// * Call construction overhead: 1 (B)
10027/// * Frame construction overhead: 0 (Return included in sequence)
10028/// * Requires stack fixups? No
10029///
10030/// \p MachineOutlinerNoLRSave implies that the function should be called using
10031/// a BL instruction, but doesn't require LR to be saved and restored. This
10032/// happens when LR is known to be dead.
10033///
10034/// That is,
10035///
10036/// I1 OUTLINED_FUNCTION:
10037/// I2 --> BL OUTLINED_FUNCTION I1
10038/// I3 I2
10039/// I3
10040/// RET
10041///
10042/// * Call construction overhead: 1 (BL)
10043/// * Frame construction overhead: 1 (RET)
10044/// * Requires stack fixups? No
10045///
10046/// \p MachineOutlinerThunk implies that the function is being created from
10047/// a sequence of instructions ending in a call. The outlined function is
10048/// called with a BL instruction, and the outlined function tail-calls the
10049/// original call destination.
10050///
10051/// That is,
10052///
10053/// I1 OUTLINED_FUNCTION:
10054/// I2 --> BL OUTLINED_FUNCTION I1
10055/// BL f I2
10056/// B f
10057/// * Call construction overhead: 1 (BL)
10058/// * Frame construction overhead: 0
10059/// * Requires stack fixups? No
10060///
10061/// \p MachineOutlinerRegSave implies that the function should be called with a
10062/// save and restore of LR to an available register. This allows us to avoid
10063/// stack fixups. Note that this outlining variant is compatible with the
10064/// NoLRSave case.
10065///
10066/// That is,
10067///
10068/// I1 Save LR OUTLINED_FUNCTION:
10069/// I2 --> BL OUTLINED_FUNCTION I1
10070/// I3 Restore LR I2
10071/// I3
10072/// RET
10073///
10074/// * Call construction overhead: 3 (save + BL + restore)
10075/// * Frame construction overhead: 1 (ret)
10076/// * Requires stack fixups? No
10078 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
10079 MachineOutlinerTailCall, /// Only emit a branch.
10080 MachineOutlinerNoLRSave, /// Emit a call and return.
10081 MachineOutlinerThunk, /// Emit a call and tail-call.
10082 MachineOutlinerRegSave /// Same as default, but save to a register.
10083};
10084
10090
10092AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
10093 MachineFunction *MF = C.getMF();
10094 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
10095 const AArch64RegisterInfo *ARI =
10096 static_cast<const AArch64RegisterInfo *>(&TRI);
10097 // Check if there is an available register across the sequence that we can
10098 // use.
10099 for (unsigned Reg : AArch64::GPR64RegClass) {
10100 if (!ARI->isReservedReg(*MF, Reg) &&
10101 Reg != AArch64::LR && // LR is not reserved, but don't use it.
10102 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
10103 Reg != AArch64::X17 && // Ditto for X17.
10104 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
10105 C.isAvailableInsideSeq(Reg, TRI))
10106 return Reg;
10107 }
10108 return Register();
10109}
10110
10111static bool
10113 const outliner::Candidate &b) {
10114 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
10115 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
10116
10117 return MFIa->getSignReturnAddressCondition() ==
10119}
10120
10121static bool
10123 const outliner::Candidate &b) {
10124 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
10125 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
10126
10127 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
10128}
10129
10131 const outliner::Candidate &b) {
10132 const AArch64Subtarget &SubtargetA =
10134 const AArch64Subtarget &SubtargetB =
10135 b.getMF()->getSubtarget<AArch64Subtarget>();
10136 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
10137}
10138
10139std::optional<std::unique_ptr<outliner::OutlinedFunction>>
10140AArch64InstrInfo::getOutliningCandidateInfo(
10141 const MachineModuleInfo &MMI,
10142 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
10143 unsigned MinRepeats) const {
10144 unsigned SequenceSize = 0;
10145 for (auto &MI : RepeatedSequenceLocs[0])
10146 SequenceSize += getInstSizeInBytes(MI);
10147
10148 unsigned NumBytesToCreateFrame = 0;
10149
10150 // Avoid splitting ADRP ADD/LDR pair into outlined functions.
10151 // These instructions are fused together by the scheduler.
10152 // Any candidate where ADRP is the last instruction should be rejected
10153 // as that will lead to splitting ADRP pair.
10154 MachineInstr &LastMI = RepeatedSequenceLocs[0].back();
10155 MachineInstr &FirstMI = RepeatedSequenceLocs[0].front();
10156 if (LastMI.getOpcode() == AArch64::ADRP &&
10157 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 &&
10158 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) {
10159 return std::nullopt;
10160 }
10161
10162 // Similarly any candidate where the first instruction is ADD/LDR with a
10163 // page offset should be rejected to avoid ADRP splitting.
10164 if ((FirstMI.getOpcode() == AArch64::ADDXri ||
10165 FirstMI.getOpcode() == AArch64::LDRXui) &&
10166 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 &&
10167 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) {
10168 return std::nullopt;
10169 }
10170
10171 // We only allow outlining for functions having exactly matching return
10172 // address signing attributes, i.e., all share the same value for the
10173 // attribute "sign-return-address" and all share the same type of key they
10174 // are signed with.
10175 // Additionally we require all functions to simultaneously either support
10176 // v8.3a features or not. Otherwise an outlined function could get signed
10177 // using dedicated v8.3 instructions and a call from a function that doesn't
10178 // support v8.3 instructions would therefore be invalid.
10179 if (std::adjacent_find(
10180 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
10181 [](const outliner::Candidate &a, const outliner::Candidate &b) {
10182 // Return true if a and b are non-equal w.r.t. return address
10183 // signing or support of v8.3a features
10184 if (outliningCandidatesSigningScopeConsensus(a, b) &&
10185 outliningCandidatesSigningKeyConsensus(a, b) &&
10186 outliningCandidatesV8_3OpsConsensus(a, b)) {
10187 return false;
10188 }
10189 return true;
10190 }) != RepeatedSequenceLocs.end()) {
10191 return std::nullopt;
10192 }
10193
10194 // Since at this point all candidates agree on their return address signing
10195 // picking just one is fine. If the candidate functions potentially sign their
10196 // return addresses, the outlined function should do the same. Note that in
10197 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
10198 // not certainly true that the outlined function will have to sign its return
10199 // address but this decision is made later, when the decision to outline
10200 // has already been made.
10201 // The same holds for the number of additional instructions we need: On
10202 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
10203 // necessary. However, at this point we don't know if the outlined function
10204 // will have a RET instruction so we assume the worst.
10205 const TargetRegisterInfo &TRI = getRegisterInfo();
10206 // Performing a tail call may require extra checks when PAuth is enabled.
10207 // If PAuth is disabled, set it to zero for uniformity.
10208 unsigned NumBytesToCheckLRInTCEpilogue = 0;
10209 const auto RASignCondition = RepeatedSequenceLocs[0]
10210 .getMF()
10211 ->getInfo<AArch64FunctionInfo>()
10212 ->getSignReturnAddressCondition();
10213 if (RASignCondition != SignReturnAddress::None) {
10214 // One PAC and one AUT instructions
10215 NumBytesToCreateFrame += 8;
10216
10217 // PAuth is enabled - set extra tail call cost, if any.
10218 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
10219 *RepeatedSequenceLocs[0].getMF());
10220 NumBytesToCheckLRInTCEpilogue =
10222 // Checking the authenticated LR value may significantly impact
10223 // SequenceSize, so account for it for more precise results.
10224 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
10225 SequenceSize += NumBytesToCheckLRInTCEpilogue;
10226
10227 // We have to check if sp modifying instructions would get outlined.
10228 // If so we only allow outlining if sp is unchanged overall, so matching
10229 // sub and add instructions are okay to outline, all other sp modifications
10230 // are not
10231 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
10232 int SPValue = 0;
10233 for (auto &MI : C) {
10234 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
10235 switch (MI.getOpcode()) {
10236 case AArch64::ADDXri:
10237 case AArch64::ADDWri:
10238 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
10239 assert(MI.getOperand(2).isImm() &&
10240 "Expected operand to be immediate");
10241 assert(MI.getOperand(1).isReg() &&
10242 "Expected operand to be a register");
10243 // Check if the add just increments sp. If so, we search for
10244 // matching sub instructions that decrement sp. If not, the
10245 // modification is illegal
10246 if (MI.getOperand(1).getReg() == AArch64::SP)
10247 SPValue += MI.getOperand(2).getImm();
10248 else
10249 return true;
10250 break;
10251 case AArch64::SUBXri:
10252 case AArch64::SUBWri:
10253 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
10254 assert(MI.getOperand(2).isImm() &&
10255 "Expected operand to be immediate");
10256 assert(MI.getOperand(1).isReg() &&
10257 "Expected operand to be a register");
10258 // Check if the sub just decrements sp. If so, we search for
10259 // matching add instructions that increment sp. If not, the
10260 // modification is illegal
10261 if (MI.getOperand(1).getReg() == AArch64::SP)
10262 SPValue -= MI.getOperand(2).getImm();
10263 else
10264 return true;
10265 break;
10266 default:
10267 return true;
10268 }
10269 }
10270 }
10271 if (SPValue)
10272 return true;
10273 return false;
10274 };
10275 // Remove candidates with illegal stack modifying instructions
10276 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
10277
10278 // If the sequence doesn't have enough candidates left, then we're done.
10279 if (RepeatedSequenceLocs.size() < MinRepeats)
10280 return std::nullopt;
10281 }
10282
10283 // Properties about candidate MBBs that hold for all of them.
10284 unsigned FlagsSetInAll = 0xF;
10285
10286 // Compute liveness information for each candidate, and set FlagsSetInAll.
10287 for (outliner::Candidate &C : RepeatedSequenceLocs)
10288 FlagsSetInAll &= C.Flags;
10289
10290 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
10291
10292 // Helper lambda which sets call information for every candidate.
10293 auto SetCandidateCallInfo =
10294 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
10295 for (outliner::Candidate &C : RepeatedSequenceLocs)
10296 C.setCallInfo(CallID, NumBytesForCall);
10297 };
10298
10299 unsigned FrameID = MachineOutlinerDefault;
10300 NumBytesToCreateFrame += 4;
10301
10302 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
10303 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
10304 });
10305
10306 // We check to see if CFI Instructions are present, and if they are
10307 // we find the number of CFI Instructions in the candidates.
10308 unsigned CFICount = 0;
10309 for (auto &I : RepeatedSequenceLocs[0]) {
10310 if (I.isCFIInstruction())
10311 CFICount++;
10312 }
10313
10314 // We compare the number of found CFI Instructions to the number of CFI
10315 // instructions in the parent function for each candidate. We must check this
10316 // since if we outline one of the CFI instructions in a function, we have to
10317 // outline them all for correctness. If we do not, the address offsets will be
10318 // incorrect between the two sections of the program.
10319 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10320 std::vector<MCCFIInstruction> CFIInstructions =
10321 C.getMF()->getFrameInstructions();
10322
10323 if (CFICount > 0 && CFICount != CFIInstructions.size())
10324 return std::nullopt;
10325 }
10326
10327 // Returns true if an instructions is safe to fix up, false otherwise.
10328 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
10329 if (MI.isCall())
10330 return true;
10331
10332 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
10333 !MI.readsRegister(AArch64::SP, &TRI))
10334 return true;
10335
10336 // Any modification of SP will break our code to save/restore LR.
10337 // FIXME: We could handle some instructions which add a constant
10338 // offset to SP, with a bit more work.
10339 if (MI.modifiesRegister(AArch64::SP, &TRI))
10340 return false;
10341
10342 // At this point, we have a stack instruction that we might need to
10343 // fix up. We'll handle it if it's a load or store.
10344 if (MI.mayLoadOrStore()) {
10345 const MachineOperand *Base; // Filled with the base operand of MI.
10346 int64_t Offset; // Filled with the offset of MI.
10347 bool OffsetIsScalable;
10348
10349 // Does it allow us to offset the base operand and is the base the
10350 // register SP?
10351 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
10352 !Base->isReg() || Base->getReg() != AArch64::SP)
10353 return false;
10354
10355 // Fixe-up code below assumes bytes.
10356 if (OffsetIsScalable)
10357 return false;
10358
10359 // Find the minimum/maximum offset for this instruction and check
10360 // if fixing it up would be in range.
10361 int64_t MinOffset,
10362 MaxOffset; // Unscaled offsets for the instruction.
10363 // The scale to multiply the offsets by.
10364 TypeSize Scale(0U, false), DummyWidth(0U, false);
10365 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
10366
10367 Offset += 16; // Update the offset to what it would be if we outlined.
10368 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
10369 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
10370 return false;
10371
10372 // It's in range, so we can outline it.
10373 return true;
10374 }
10375
10376 // FIXME: Add handling for instructions like "add x0, sp, #8".
10377
10378 // We can't fix it up, so don't outline it.
10379 return false;
10380 };
10381
10382 // True if it's possible to fix up each stack instruction in this sequence.
10383 // Important for frames/call variants that modify the stack.
10384 bool AllStackInstrsSafe =
10385 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
10386
10387 // If the last instruction in any candidate is a terminator, then we should
10388 // tail call all of the candidates.
10389 if (RepeatedSequenceLocs[0].back().isTerminator()) {
10390 FrameID = MachineOutlinerTailCall;
10391 NumBytesToCreateFrame = 0;
10392 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
10393 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
10394 }
10395
10396 else if (LastInstrOpcode == AArch64::BL ||
10397 ((LastInstrOpcode == AArch64::BLR ||
10398 LastInstrOpcode == AArch64::BLRNoIP) &&
10399 !HasBTI)) {
10400 // FIXME: Do we need to check if the code after this uses the value of LR?
10401 FrameID = MachineOutlinerThunk;
10402 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
10403 SetCandidateCallInfo(MachineOutlinerThunk, 4);
10404 }
10405
10406 else {
10407 // We need to decide how to emit calls + frames. We can always emit the same
10408 // frame if we don't need to save to the stack. If we have to save to the
10409 // stack, then we need a different frame.
10410 unsigned NumBytesNoStackCalls = 0;
10411 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
10412
10413 // Check if we have to save LR.
10414 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10415 bool LRAvailable =
10417 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
10418 : true;
10419 // If we have a noreturn caller, then we're going to be conservative and
10420 // say that we have to save LR. If we don't have a ret at the end of the
10421 // block, then we can't reason about liveness accurately.
10422 //
10423 // FIXME: We can probably do better than always disabling this in
10424 // noreturn functions by fixing up the liveness info.
10425 bool IsNoReturn =
10426 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
10427
10428 // Is LR available? If so, we don't need a save.
10429 if (LRAvailable && !IsNoReturn) {
10430 NumBytesNoStackCalls += 4;
10431 C.setCallInfo(MachineOutlinerNoLRSave, 4);
10432 CandidatesWithoutStackFixups.push_back(C);
10433 }
10434
10435 // Is an unused register available? If so, we won't modify the stack, so
10436 // we can outline with the same frame type as those that don't save LR.
10437 else if (findRegisterToSaveLRTo(C)) {
10438 NumBytesNoStackCalls += 12;
10439 C.setCallInfo(MachineOutlinerRegSave, 12);
10440 CandidatesWithoutStackFixups.push_back(C);
10441 }
10442
10443 // Is SP used in the sequence at all? If not, we don't have to modify
10444 // the stack, so we are guaranteed to get the same frame.
10445 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
10446 NumBytesNoStackCalls += 12;
10447 C.setCallInfo(MachineOutlinerDefault, 12);
10448 CandidatesWithoutStackFixups.push_back(C);
10449 }
10450
10451 // If we outline this, we need to modify the stack. Pretend we don't
10452 // outline this by saving all of its bytes.
10453 else {
10454 NumBytesNoStackCalls += SequenceSize;
10455 }
10456 }
10457
10458 // If there are no places where we have to save LR, then note that we
10459 // don't have to update the stack. Otherwise, give every candidate the
10460 // default call type, as long as it's safe to do so.
10461 if (!AllStackInstrsSafe ||
10462 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
10463 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
10464 FrameID = MachineOutlinerNoLRSave;
10465 if (RepeatedSequenceLocs.size() < MinRepeats)
10466 return std::nullopt;
10467 } else {
10468 SetCandidateCallInfo(MachineOutlinerDefault, 12);
10469
10470 // Bugzilla ID: 46767
10471 // TODO: Check if fixing up the stack more than once is safe so we can
10472 // outline these.
10473 //
10474 // An outline resulting in a caller that requires stack fixups at the
10475 // callsite to a callee that also requires stack fixups can happen when
10476 // there are no available registers at the candidate callsite for a
10477 // candidate that itself also has calls.
10478 //
10479 // In other words if function_containing_sequence in the following pseudo
10480 // assembly requires that we save LR at the point of the call, but there
10481 // are no available registers: in this case we save using SP and as a
10482 // result the SP offsets requires stack fixups by multiples of 16.
10483 //
10484 // function_containing_sequence:
10485 // ...
10486 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10487 // call OUTLINED_FUNCTION_N
10488 // restore LR from SP
10489 // ...
10490 //
10491 // OUTLINED_FUNCTION_N:
10492 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10493 // ...
10494 // bl foo
10495 // restore LR from SP
10496 // ret
10497 //
10498 // Because the code to handle more than one stack fixup does not
10499 // currently have the proper checks for legality, these cases will assert
10500 // in the AArch64 MachineOutliner. This is because the code to do this
10501 // needs more hardening, testing, better checks that generated code is
10502 // legal, etc and because it is only verified to handle a single pass of
10503 // stack fixup.
10504 //
10505 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
10506 // these cases until they are known to be handled. Bugzilla 46767 is
10507 // referenced in comments at the assert site.
10508 //
10509 // To avoid asserting (or generating non-legal code on noassert builds)
10510 // we remove all candidates which would need more than one stack fixup by
10511 // pruning the cases where the candidate has calls while also having no
10512 // available LR and having no available general purpose registers to copy
10513 // LR to (ie one extra stack save/restore).
10514 //
10515 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10516 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
10517 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
10518 return (llvm::any_of(C, IsCall)) &&
10519 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
10520 !findRegisterToSaveLRTo(C));
10521 });
10522 }
10523 }
10524
10525 // If we dropped all of the candidates, bail out here.
10526 if (RepeatedSequenceLocs.size() < MinRepeats)
10527 return std::nullopt;
10528 }
10529
10530 // Does every candidate's MBB contain a call? If so, then we might have a call
10531 // in the range.
10532 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10533 // Check if the range contains a call. These require a save + restore of the
10534 // link register.
10535 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
10536 bool ModStackToSaveLR = false;
10537 if (any_of(drop_end(FirstCand),
10538 [](const MachineInstr &MI) { return MI.isCall(); }))
10539 ModStackToSaveLR = true;
10540
10541 // Handle the last instruction separately. If this is a tail call, then the
10542 // last instruction is a call. We don't want to save + restore in this case.
10543 // However, it could be possible that the last instruction is a call without
10544 // it being valid to tail call this sequence. We should consider this as
10545 // well.
10546 else if (FrameID != MachineOutlinerThunk &&
10547 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
10548 ModStackToSaveLR = true;
10549
10550 if (ModStackToSaveLR) {
10551 // We can't fix up the stack. Bail out.
10552 if (!AllStackInstrsSafe)
10553 return std::nullopt;
10554
10555 // Save + restore LR.
10556 NumBytesToCreateFrame += 8;
10557 }
10558 }
10559
10560 // If we have CFI instructions, we can only outline if the outlined section
10561 // can be a tail call
10562 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
10563 return std::nullopt;
10564
10565 return std::make_unique<outliner::OutlinedFunction>(
10566 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
10567}
10568
10569void AArch64InstrInfo::mergeOutliningCandidateAttributes(
10570 Function &F, std::vector<outliner::Candidate> &Candidates) const {
10571 // If a bunch of candidates reach this point they must agree on their return
10572 // address signing. It is therefore enough to just consider the signing
10573 // behaviour of one of them
10574 const auto &CFn = Candidates.front().getMF()->getFunction();
10575
10576 if (CFn.hasFnAttribute("ptrauth-returns"))
10577 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
10578 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
10579 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
10580 // Since all candidates belong to the same module, just copy the
10581 // function-level attributes of an arbitrary function.
10582 if (CFn.hasFnAttribute("sign-return-address"))
10583 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
10584 if (CFn.hasFnAttribute("sign-return-address-key"))
10585 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
10586
10587 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
10588}
10589
10590bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
10591 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10592 const Function &F = MF.getFunction();
10593
10594 // Can F be deduplicated by the linker? If it can, don't outline from it.
10595 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10596 return false;
10597
10598 // Don't outline from functions with section markings; the program could
10599 // expect that all the code is in the named section.
10600 // FIXME: Allow outlining from multiple functions with the same section
10601 // marking.
10602 if (F.hasSection())
10603 return false;
10604
10605 // Outlining from functions with redzones is unsafe since the outliner may
10606 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
10607 // outline from it.
10608 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
10609 if (!AFI || AFI->hasRedZone().value_or(true))
10610 return false;
10611
10612 // FIXME: Determine whether it is safe to outline from functions which contain
10613 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
10614 // outlined together and ensure it is safe to outline with async unwind info,
10615 // required for saving & restoring VG around calls.
10616 if (AFI->hasStreamingModeChanges())
10617 return false;
10618
10619 // FIXME: Teach the outliner to generate/handle Windows unwind info.
10621 return false;
10622
10623 // It's safe to outline from MF.
10624 return true;
10625}
10626
10628AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10629 unsigned &Flags) const {
10631 "Must track liveness!");
10633 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10634 Ranges;
10635 // According to the AArch64 Procedure Call Standard, the following are
10636 // undefined on entry/exit from a function call:
10637 //
10638 // * Registers x16, x17, (and thus w16, w17)
10639 // * Condition codes (and thus the NZCV register)
10640 //
10641 // If any of these registers are used inside or live across an outlined
10642 // function, then they may be modified later, either by the compiler or
10643 // some other tool (like the linker).
10644 //
10645 // To avoid outlining in these situations, partition each block into ranges
10646 // where these registers are dead. We will only outline from those ranges.
10647 LiveRegUnits LRU(getRegisterInfo());
10648 auto AreAllUnsafeRegsDead = [&LRU]() {
10649 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10650 LRU.available(AArch64::NZCV);
10651 };
10652
10653 // We need to know if LR is live across an outlining boundary later on in
10654 // order to decide how we'll create the outlined call, frame, etc.
10655 //
10656 // It's pretty expensive to check this for *every candidate* within a block.
10657 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10658 // to compute liveness from the end of the block for O(n) candidates within
10659 // the block.
10660 //
10661 // So, to improve the average case, let's keep track of liveness from the end
10662 // of the block to the beginning of *every outlinable range*. If we know that
10663 // LR is available in every range we could outline from, then we know that
10664 // we don't need to check liveness for any candidate within that range.
10665 bool LRAvailableEverywhere = true;
10666 // Compute liveness bottom-up.
10667 LRU.addLiveOuts(MBB);
10668 // Update flags that require info about the entire MBB.
10669 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10670 if (MI.isCall() && !MI.isTerminator())
10672 };
10673 // Range: [RangeBegin, RangeEnd)
10674 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10675 unsigned RangeLen;
10676 auto CreateNewRangeStartingAt =
10677 [&RangeBegin, &RangeEnd,
10678 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10679 RangeBegin = NewBegin;
10680 RangeEnd = std::next(RangeBegin);
10681 RangeLen = 0;
10682 };
10683 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10684 // At least one unsafe register is not dead. We do not want to outline at
10685 // this point. If it is long enough to outline from and does not cross a
10686 // bundle boundary, save the range [RangeBegin, RangeEnd).
10687 if (RangeLen <= 1)
10688 return;
10689 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10690 return;
10691 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10692 return;
10693 Ranges.emplace_back(RangeBegin, RangeEnd);
10694 };
10695 // Find the first point where all unsafe registers are dead.
10696 // FIND: <safe instr> <-- end of first potential range
10697 // SKIP: <unsafe def>
10698 // SKIP: ... everything between ...
10699 // SKIP: <unsafe use>
10700 auto FirstPossibleEndPt = MBB.instr_rbegin();
10701 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10702 if (!FirstPossibleEndPt->isDebugInstr())
10703 LRU.stepBackward(*FirstPossibleEndPt);
10704 // Update flags that impact how we outline across the entire block,
10705 // regardless of safety.
10706 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10707 if (AreAllUnsafeRegsDead())
10708 break;
10709 }
10710 // If we exhausted the entire block, we have no safe ranges to outline.
10711 if (FirstPossibleEndPt == MBB.instr_rend())
10712 return Ranges;
10713 // Current range.
10714 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10715 // StartPt points to the first place where all unsafe registers
10716 // are dead (if there is any such point). Begin partitioning the MBB into
10717 // ranges.
10718 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10719 if (!MI.isDebugInstr())
10720 LRU.stepBackward(MI);
10721 UpdateWholeMBBFlags(MI);
10722 if (!AreAllUnsafeRegsDead()) {
10723 SaveRangeIfNonEmpty();
10724 CreateNewRangeStartingAt(MI.getIterator());
10725 continue;
10726 }
10727 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10728 RangeBegin = MI.getIterator();
10729 ++RangeLen;
10730 }
10731 // Above loop misses the last (or only) range. If we are still safe, then
10732 // let's save the range.
10733 if (AreAllUnsafeRegsDead())
10734 SaveRangeIfNonEmpty();
10735 if (Ranges.empty())
10736 return Ranges;
10737 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10738 // the order.
10739 std::reverse(Ranges.begin(), Ranges.end());
10740 // If there is at least one outlinable range where LR is unavailable
10741 // somewhere, remember that.
10742 if (!LRAvailableEverywhere)
10744 return Ranges;
10745}
10746
10748AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10750 unsigned Flags) const {
10751 MachineInstr &MI = *MIT;
10752
10753 // Don't outline anything used for return address signing. The outlined
10754 // function will get signed later if needed
10755 switch (MI.getOpcode()) {
10756 case AArch64::PACM:
10757 case AArch64::PACIASP:
10758 case AArch64::PACIBSP:
10759 case AArch64::PACIASPPC:
10760 case AArch64::PACIBSPPC:
10761 case AArch64::AUTIASP:
10762 case AArch64::AUTIBSP:
10763 case AArch64::AUTIASPPCi:
10764 case AArch64::AUTIASPPCr:
10765 case AArch64::AUTIBSPPCi:
10766 case AArch64::AUTIBSPPCr:
10767 case AArch64::RETAA:
10768 case AArch64::RETAB:
10769 case AArch64::RETAASPPCi:
10770 case AArch64::RETAASPPCr:
10771 case AArch64::RETABSPPCi:
10772 case AArch64::RETABSPPCr:
10773 case AArch64::EMITBKEY:
10774 case AArch64::PAUTH_PROLOGUE:
10775 case AArch64::PAUTH_EPILOGUE:
10777 }
10778
10779 // We can only outline these if we will tail call the outlined function, or
10780 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10781 // in a tail call.
10782 //
10783 // FIXME: If the proper fixups for the offset are implemented, this should be
10784 // possible.
10785 if (MI.isCFIInstruction())
10787
10788 // Is this a terminator for a basic block?
10789 if (MI.isTerminator())
10790 // TargetInstrInfo::getOutliningType has already filtered out anything
10791 // that would break this, so we can allow it here.
10793
10794 // Make sure none of the operands are un-outlinable.
10795 for (const MachineOperand &MOP : MI.operands()) {
10796 // A check preventing CFI indices was here before, but only CFI
10797 // instructions should have those.
10798 assert(!MOP.isCFIIndex());
10799
10800 // If it uses LR or W30 explicitly, then don't touch it.
10801 if (MOP.isReg() && !MOP.isImplicit() &&
10802 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10804 }
10805
10806 // Special cases for instructions that can always be outlined, but will fail
10807 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10808 // be outlined because they don't require a *specific* value to be in LR.
10809 if (MI.getOpcode() == AArch64::ADRP)
10811
10812 // If MI is a call we might be able to outline it. We don't want to outline
10813 // any calls that rely on the position of items on the stack. When we outline
10814 // something containing a call, we have to emit a save and restore of LR in
10815 // the outlined function. Currently, this always happens by saving LR to the
10816 // stack. Thus, if we outline, say, half the parameters for a function call
10817 // plus the call, then we'll break the callee's expectations for the layout
10818 // of the stack.
10819 //
10820 // FIXME: Allow calls to functions which construct a stack frame, as long
10821 // as they don't access arguments on the stack.
10822 // FIXME: Figure out some way to analyze functions defined in other modules.
10823 // We should be able to compute the memory usage based on the IR calling
10824 // convention, even if we can't see the definition.
10825 if (MI.isCall()) {
10826 // Get the function associated with the call. Look at each operand and find
10827 // the one that represents the callee and get its name.
10828 const Function *Callee = nullptr;
10829 for (const MachineOperand &MOP : MI.operands()) {
10830 if (MOP.isGlobal()) {
10831 Callee = dyn_cast<Function>(MOP.getGlobal());
10832 break;
10833 }
10834 }
10835
10836 // Never outline calls to mcount. There isn't any rule that would require
10837 // this, but the Linux kernel's "ftrace" feature depends on it.
10838 if (Callee && Callee->getName() == "\01_mcount")
10840
10841 // If we don't know anything about the callee, assume it depends on the
10842 // stack layout of the caller. In that case, it's only legal to outline
10843 // as a tail-call. Explicitly list the call instructions we know about so we
10844 // don't get unexpected results with call pseudo-instructions.
10845 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10846 if (MI.getOpcode() == AArch64::BLR ||
10847 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10848 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10849
10850 if (!Callee)
10851 return UnknownCallOutlineType;
10852
10853 // We have a function we have information about. Check it if it's something
10854 // can safely outline.
10855 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10856
10857 // We don't know what's going on with the callee at all. Don't touch it.
10858 if (!CalleeMF)
10859 return UnknownCallOutlineType;
10860
10861 // Check if we know anything about the callee saves on the function. If we
10862 // don't, then don't touch it, since that implies that we haven't
10863 // computed anything about its stack frame yet.
10864 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10865 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10866 MFI.getNumObjects() > 0)
10867 return UnknownCallOutlineType;
10868
10869 // At this point, we can say that CalleeMF ought to not pass anything on the
10870 // stack. Therefore, we can outline it.
10872 }
10873
10874 // Don't touch the link register or W30.
10875 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10876 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10878
10879 // Don't outline BTI instructions, because that will prevent the outlining
10880 // site from being indirectly callable.
10881 if (hasBTISemantics(MI))
10883
10885}
10886
10887void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10888 for (MachineInstr &MI : MBB) {
10889 const MachineOperand *Base;
10890 TypeSize Width(0, false);
10891 int64_t Offset;
10892 bool OffsetIsScalable;
10893
10894 // Is this a load or store with an immediate offset with SP as the base?
10895 if (!MI.mayLoadOrStore() ||
10896 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10897 &RI) ||
10898 (Base->isReg() && Base->getReg() != AArch64::SP))
10899 continue;
10900
10901 // It is, so we have to fix it up.
10902 TypeSize Scale(0U, false);
10903 int64_t Dummy1, Dummy2;
10904
10905 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10906 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10907 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10908 assert(Scale != 0 && "Unexpected opcode!");
10909 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10910
10911 // We've pushed the return address to the stack, so add 16 to the offset.
10912 // This is safe, since we already checked if it would overflow when we
10913 // checked if this instruction was legal to outline.
10914 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10915 StackOffsetOperand.setImm(NewImm);
10916 }
10917}
10918
10920 const AArch64InstrInfo *TII,
10921 bool ShouldSignReturnAddr) {
10922 if (!ShouldSignReturnAddr)
10923 return;
10924
10925 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10927 TII->createPauthEpilogueInstr(MBB, DebugLoc());
10928}
10929
10930void AArch64InstrInfo::buildOutlinedFrame(
10932 const outliner::OutlinedFunction &OF) const {
10933
10934 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10935
10936 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10937 FI->setOutliningStyle("Tail Call");
10938 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10939 // For thunk outlining, rewrite the last instruction from a call to a
10940 // tail-call.
10941 MachineInstr *Call = &*--MBB.instr_end();
10942 unsigned TailOpcode;
10943 if (Call->getOpcode() == AArch64::BL) {
10944 TailOpcode = AArch64::TCRETURNdi;
10945 } else {
10946 assert(Call->getOpcode() == AArch64::BLR ||
10947 Call->getOpcode() == AArch64::BLRNoIP);
10948 TailOpcode = AArch64::TCRETURNriALL;
10949 }
10950 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10951 .add(Call->getOperand(0))
10952 .addImm(0);
10953 MBB.insert(MBB.end(), TC);
10955
10956 FI->setOutliningStyle("Thunk");
10957 }
10958
10959 bool IsLeafFunction = true;
10960
10961 // Is there a call in the outlined range?
10962 auto IsNonTailCall = [](const MachineInstr &MI) {
10963 return MI.isCall() && !MI.isReturn();
10964 };
10965
10966 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10967 // Fix up the instructions in the range, since we're going to modify the
10968 // stack.
10969
10970 // Bugzilla ID: 46767
10971 // TODO: Check if fixing up twice is safe so we can outline these.
10972 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10973 "Can only fix up stack references once");
10974 fixupPostOutline(MBB);
10975
10976 IsLeafFunction = false;
10977
10978 // LR has to be a live in so that we can save it.
10979 if (!MBB.isLiveIn(AArch64::LR))
10980 MBB.addLiveIn(AArch64::LR);
10981
10984
10985 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10986 OF.FrameConstructionID == MachineOutlinerThunk)
10987 Et = std::prev(MBB.end());
10988
10989 // Insert a save before the outlined region
10990 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10991 .addReg(AArch64::SP, RegState::Define)
10992 .addReg(AArch64::LR)
10993 .addReg(AArch64::SP)
10994 .addImm(-16);
10995 It = MBB.insert(It, STRXpre);
10996
10997 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10998 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10999
11000 // Add a CFI saying the stack was moved 16 B down.
11001 CFIBuilder.buildDefCFAOffset(16);
11002
11003 // Add a CFI saying that the LR that we want to find is now 16 B higher
11004 // than before.
11005 CFIBuilder.buildOffset(AArch64::LR, -16);
11006 }
11007
11008 // Insert a restore before the terminator for the function.
11009 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
11010 .addReg(AArch64::SP, RegState::Define)
11011 .addReg(AArch64::LR, RegState::Define)
11012 .addReg(AArch64::SP)
11013 .addImm(16);
11014 Et = MBB.insert(Et, LDRXpost);
11015 }
11016
11017 auto RASignCondition = FI->getSignReturnAddressCondition();
11018 bool ShouldSignReturnAddr = AArch64FunctionInfo::shouldSignReturnAddress(
11019 RASignCondition, !IsLeafFunction);
11020
11021 // If this is a tail call outlined function, then there's already a return.
11022 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
11023 OF.FrameConstructionID == MachineOutlinerThunk) {
11024 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
11025 return;
11026 }
11027
11028 // It's not a tail call, so we have to insert the return ourselves.
11029
11030 // LR has to be a live in so that we can return to it.
11031 if (!MBB.isLiveIn(AArch64::LR))
11032 MBB.addLiveIn(AArch64::LR);
11033
11034 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
11035 .addReg(AArch64::LR);
11036 MBB.insert(MBB.end(), ret);
11037
11038 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
11039
11040 FI->setOutliningStyle("Function");
11041
11042 // Did we have to modify the stack by saving the link register?
11043 if (OF.FrameConstructionID != MachineOutlinerDefault)
11044 return;
11045
11046 // We modified the stack.
11047 // Walk over the basic block and fix up all the stack accesses.
11048 fixupPostOutline(MBB);
11049}
11050
11051MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
11054
11055 // Are we tail calling?
11056 if (C.CallConstructionID == MachineOutlinerTailCall) {
11057 // If yes, then we can just branch to the label.
11058 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
11059 .addGlobalAddress(M.getNamedValue(MF.getName()))
11060 .addImm(0));
11061 return It;
11062 }
11063
11064 // Are we saving the link register?
11065 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
11066 C.CallConstructionID == MachineOutlinerThunk) {
11067 // No, so just insert the call.
11068 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
11069 .addGlobalAddress(M.getNamedValue(MF.getName())));
11070 return It;
11071 }
11072
11073 // We want to return the spot where we inserted the call.
11075
11076 // Instructions for saving and restoring LR around the call instruction we're
11077 // going to insert.
11078 MachineInstr *Save;
11079 MachineInstr *Restore;
11080 // Can we save to a register?
11081 if (C.CallConstructionID == MachineOutlinerRegSave) {
11082 // FIXME: This logic should be sunk into a target-specific interface so that
11083 // we don't have to recompute the register.
11084 Register Reg = findRegisterToSaveLRTo(C);
11085 assert(Reg && "No callee-saved register available?");
11086
11087 // LR has to be a live in so that we can save it.
11088 if (!MBB.isLiveIn(AArch64::LR))
11089 MBB.addLiveIn(AArch64::LR);
11090
11091 // Save and restore LR from Reg.
11092 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
11093 .addReg(AArch64::XZR)
11094 .addReg(AArch64::LR)
11095 .addImm(0);
11096 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
11097 .addReg(AArch64::XZR)
11098 .addReg(Reg)
11099 .addImm(0);
11100 } else {
11101 // We have the default case. Save and restore from SP.
11102 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
11103 .addReg(AArch64::SP, RegState::Define)
11104 .addReg(AArch64::LR)
11105 .addReg(AArch64::SP)
11106 .addImm(-16);
11107 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
11108 .addReg(AArch64::SP, RegState::Define)
11109 .addReg(AArch64::LR, RegState::Define)
11110 .addReg(AArch64::SP)
11111 .addImm(16);
11112 }
11113
11114 It = MBB.insert(It, Save);
11115 It++;
11116
11117 // Insert the call.
11118 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
11119 .addGlobalAddress(M.getNamedValue(MF.getName())));
11120 CallPt = It;
11121 It++;
11122
11123 It = MBB.insert(It, Restore);
11124 return CallPt;
11125}
11126
11127bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
11128 MachineFunction &MF) const {
11129 return MF.getFunction().hasMinSize();
11130}
11131
11132void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
11134 DebugLoc &DL,
11135 bool AllowSideEffects) const {
11136 const MachineFunction &MF = *MBB.getParent();
11137 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
11138 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
11139
11140 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
11141 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
11142 } else if (STI.isSVEorStreamingSVEAvailable()) {
11143 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
11144 .addImm(0)
11145 .addImm(0);
11146 } else if (STI.isNeonAvailable()) {
11147 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
11148 .addImm(0);
11149 } else {
11150 // This is a streaming-compatible function without SVE. We don't have full
11151 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
11152 // So given `movi v..` would be illegal use `fmov d..` instead.
11153 assert(STI.hasNEON() && "Expected to have NEON.");
11154 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
11155 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
11156 }
11157}
11158
11159std::optional<DestSourcePair>
11161
11162 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
11163 // and zero immediate operands used as an alias for mov instruction.
11164 if (((MI.getOpcode() == AArch64::ORRWrs &&
11165 MI.getOperand(1).getReg() == AArch64::WZR &&
11166 MI.getOperand(3).getImm() == 0x0) ||
11167 (MI.getOpcode() == AArch64::ORRWrr &&
11168 MI.getOperand(1).getReg() == AArch64::WZR)) &&
11169 // Check that the w->w move is not a zero-extending w->x mov.
11170 (!MI.getOperand(0).getReg().isVirtual() ||
11171 MI.getOperand(0).getSubReg() == 0) &&
11172 (!MI.getOperand(0).getReg().isPhysical() ||
11173 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
11174 /*TRI=*/nullptr) == -1))
11175 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
11176
11177 if (MI.getOpcode() == AArch64::ORRXrs &&
11178 MI.getOperand(1).getReg() == AArch64::XZR &&
11179 MI.getOperand(3).getImm() == 0x0)
11180 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
11181
11182 return std::nullopt;
11183}
11184
11185std::optional<DestSourcePair>
11187 if ((MI.getOpcode() == AArch64::ORRWrs &&
11188 MI.getOperand(1).getReg() == AArch64::WZR &&
11189 MI.getOperand(3).getImm() == 0x0) ||
11190 (MI.getOpcode() == AArch64::ORRWrr &&
11191 MI.getOperand(1).getReg() == AArch64::WZR))
11192 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
11193 return std::nullopt;
11194}
11195
11196std::optional<RegImmPair>
11197AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
11198 int Sign = 1;
11199 int64_t Offset = 0;
11200
11201 // TODO: Handle cases where Reg is a super- or sub-register of the
11202 // destination register.
11203 const MachineOperand &Op0 = MI.getOperand(0);
11204 if (!Op0.isReg() || Reg != Op0.getReg())
11205 return std::nullopt;
11206
11207 switch (MI.getOpcode()) {
11208 default:
11209 return std::nullopt;
11210 case AArch64::SUBWri:
11211 case AArch64::SUBXri:
11212 case AArch64::SUBSWri:
11213 case AArch64::SUBSXri:
11214 Sign *= -1;
11215 [[fallthrough]];
11216 case AArch64::ADDSWri:
11217 case AArch64::ADDSXri:
11218 case AArch64::ADDWri:
11219 case AArch64::ADDXri: {
11220 // TODO: Third operand can be global address (usually some string).
11221 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
11222 !MI.getOperand(2).isImm())
11223 return std::nullopt;
11224 int Shift = MI.getOperand(3).getImm();
11225 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
11226 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
11227 }
11228 }
11229 return RegImmPair{MI.getOperand(1).getReg(), Offset};
11230}
11231
11232/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
11233/// the destination register then, if possible, describe the value in terms of
11234/// the source register.
11235static std::optional<ParamLoadedValue>
11237 const TargetInstrInfo *TII,
11238 const TargetRegisterInfo *TRI) {
11239 auto DestSrc = TII->isCopyLikeInstr(MI);
11240 if (!DestSrc)
11241 return std::nullopt;
11242
11243 Register DestReg = DestSrc->Destination->getReg();
11244 Register SrcReg = DestSrc->Source->getReg();
11245
11246 if (!DestReg.isValid() || !SrcReg.isValid())
11247 return std::nullopt;
11248
11249 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
11250
11251 // If the described register is the destination, just return the source.
11252 if (DestReg == DescribedReg)
11253 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
11254
11255 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
11256 if (MI.getOpcode() == AArch64::ORRWrs &&
11257 TRI->isSuperRegister(DestReg, DescribedReg))
11258 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
11259
11260 // We may need to describe the lower part of a ORRXrs move.
11261 if (MI.getOpcode() == AArch64::ORRXrs &&
11262 TRI->isSubRegister(DestReg, DescribedReg)) {
11263 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
11264 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
11265 }
11266
11267 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
11268 "Unhandled ORR[XW]rs copy case");
11269
11270 return std::nullopt;
11271}
11272
11273bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
11274 // Functions cannot be split to different sections on AArch64 if they have
11275 // a red zone. This is because relaxing a cross-section branch may require
11276 // incrementing the stack pointer to spill a register, which would overwrite
11277 // the red zone.
11278 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
11279 return false;
11280
11282}
11283
11284bool AArch64InstrInfo::isMBBSafeToSplitToCold(
11285 const MachineBasicBlock &MBB) const {
11286 // Asm Goto blocks can contain conditional branches to goto labels, which can
11287 // get moved out of range of the branch instruction.
11288 auto isAsmGoto = [](const MachineInstr &MI) {
11289 return MI.getOpcode() == AArch64::INLINEASM_BR;
11290 };
11291 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
11292 return false;
11293
11294 // Because jump tables are label-relative instead of table-relative, they all
11295 // must be in the same section or relocation fixup handling will fail.
11296
11297 // Check if MBB is a jump table target
11298 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
11299 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
11300 return llvm::is_contained(JTE.MBBs, &MBB);
11301 };
11302 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
11303 return false;
11304
11305 // Check if MBB contains a jump table lookup
11306 for (const MachineInstr &MI : MBB) {
11307 switch (MI.getOpcode()) {
11308 case TargetOpcode::G_BRJT:
11309 case AArch64::JumpTableDest32:
11310 case AArch64::JumpTableDest16:
11311 case AArch64::JumpTableDest8:
11312 return false;
11313 default:
11314 continue;
11315 }
11316 }
11317
11318 // MBB isn't a special case, so it's safe to be split to the cold section.
11319 return true;
11320}
11321
11322std::optional<ParamLoadedValue>
11323AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
11324 Register Reg) const {
11325 const MachineFunction *MF = MI.getMF();
11326 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
11327 switch (MI.getOpcode()) {
11328 case AArch64::MOVZWi:
11329 case AArch64::MOVZXi: {
11330 // MOVZWi may be used for producing zero-extended 32-bit immediates in
11331 // 64-bit parameters, so we need to consider super-registers.
11332 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
11333 return std::nullopt;
11334
11335 if (!MI.getOperand(1).isImm())
11336 return std::nullopt;
11337 int64_t Immediate = MI.getOperand(1).getImm();
11338 int Shift = MI.getOperand(2).getImm();
11339 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
11340 nullptr);
11341 }
11342 case AArch64::ORRWrs:
11343 case AArch64::ORRXrs:
11344 return describeORRLoadedValue(MI, Reg, this, TRI);
11345 }
11346
11348}
11349
11350bool AArch64InstrInfo::isExtendLikelyToBeFolded(
11351 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
11352 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
11353 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
11354 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
11355
11356 // Anyexts are nops.
11357 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
11358 return true;
11359
11360 Register DefReg = ExtMI.getOperand(0).getReg();
11361 if (!MRI.hasOneNonDBGUse(DefReg))
11362 return false;
11363
11364 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
11365 // addressing mode.
11366 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
11367 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
11368}
11369
11370uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
11371 return get(Opc).TSFlags & AArch64::ElementSizeMask;
11372}
11373
11374bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
11375 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
11376}
11377
11378bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
11379 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
11380}
11381
11382unsigned int
11383AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
11384 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
11385}
11386
11387bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
11388 unsigned Scale) const {
11389 if (Offset && Scale)
11390 return false;
11391
11392 // Check Reg + Imm
11393 if (!Scale) {
11394 // 9-bit signed offset
11395 if (isInt<9>(Offset))
11396 return true;
11397
11398 // 12-bit unsigned offset
11399 unsigned Shift = Log2_64(NumBytes);
11400 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
11401 // Must be a multiple of NumBytes (NumBytes is a power of 2)
11402 (Offset >> Shift) << Shift == Offset)
11403 return true;
11404 return false;
11405 }
11406
11407 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
11408 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
11409}
11410
11412 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
11413 return AArch64::BLRNoIP;
11414 else
11415 return AArch64::BLR;
11416}
11417
11419 DebugLoc DL) const {
11420 MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
11421 auto Builder = BuildMI(MBB, InsertPt, DL, get(AArch64::PAUTH_EPILOGUE))
11423
11424 MachineFunction &MF = *MBB.getParent();
11425 const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
11426 auto &AFL = *static_cast<const AArch64FrameLowering *>(
11427 MF.getSubtarget().getFrameLowering());
11428 if (AFL.getArgumentStackToRestore(MF, MBB)) {
11429 Builder.addReg(AArch64::X17, RegState::ImplicitDefine);
11430 Builder.addReg(AArch64::X16, RegState::ImplicitDefine);
11431 if (Subtarget.hasPAuthLR())
11432 Builder.addReg(AArch64::X15, RegState::ImplicitDefine);
11433 return;
11434 }
11435
11436 if (AFI->branchProtectionPAuthLR() && !Subtarget.hasPAuthLR())
11437 Builder.addReg(AArch64::X16, RegState::ImplicitDefine);
11438}
11439
11441AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
11442 Register TargetReg, bool FrameSetup) const {
11443 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
11444
11445 MachineBasicBlock &MBB = *MBBI->getParent();
11446 MachineFunction &MF = *MBB.getParent();
11447 const AArch64InstrInfo *TII =
11448 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
11449 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
11450 DebugLoc DL = MBB.findDebugLoc(MBBI);
11451
11452 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
11453 MachineBasicBlock *LoopTestMBB =
11454 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11455 MF.insert(MBBInsertPoint, LoopTestMBB);
11456 MachineBasicBlock *LoopBodyMBB =
11457 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11458 MF.insert(MBBInsertPoint, LoopBodyMBB);
11459 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11460 MF.insert(MBBInsertPoint, ExitMBB);
11461 MachineInstr::MIFlag Flags =
11463
11464 // LoopTest:
11465 // SUB SP, SP, #ProbeSize
11466 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
11467 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
11468
11469 // CMP SP, TargetReg
11470 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
11471 AArch64::XZR)
11472 .addReg(AArch64::SP)
11473 .addReg(TargetReg)
11475 .setMIFlags(Flags);
11476
11477 // B.<Cond> LoopExit
11478 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
11480 .addMBB(ExitMBB)
11481 .setMIFlags(Flags);
11482
11483 // LDR XZR, [SP]
11484 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::LDRXui))
11485 .addDef(AArch64::XZR)
11486 .addReg(AArch64::SP)
11487 .addImm(0)
11491 Align(8)))
11492 .setMIFlags(Flags);
11493
11494 // B loop
11495 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
11496 .addMBB(LoopTestMBB)
11497 .setMIFlags(Flags);
11498
11499 // LoopExit:
11500 // MOV SP, TargetReg
11501 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
11502 .addReg(TargetReg)
11503 .addImm(0)
11505 .setMIFlags(Flags);
11506
11507 // LDR XZR, [SP]
11508 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
11509 .addReg(AArch64::XZR, RegState::Define)
11510 .addReg(AArch64::SP)
11511 .addImm(0)
11512 .setMIFlags(Flags);
11513
11514 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
11516
11517 LoopTestMBB->addSuccessor(ExitMBB);
11518 LoopTestMBB->addSuccessor(LoopBodyMBB);
11519 LoopBodyMBB->addSuccessor(LoopTestMBB);
11520 MBB.addSuccessor(LoopTestMBB);
11521
11522 // Update liveins.
11523 if (MF.getRegInfo().reservedRegsFrozen())
11524 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
11525
11526 return ExitMBB->begin();
11527}
11528
11529namespace {
11530class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
11531 MachineFunction *MF;
11532 const TargetInstrInfo *TII;
11533 const TargetRegisterInfo *TRI;
11534 MachineRegisterInfo &MRI;
11535
11536 /// The block of the loop
11537 MachineBasicBlock *LoopBB;
11538 /// The conditional branch of the loop
11539 MachineInstr *CondBranch;
11540 /// The compare instruction for loop control
11541 MachineInstr *Comp;
11542 /// The number of the operand of the loop counter value in Comp
11543 unsigned CompCounterOprNum;
11544 /// The instruction that updates the loop counter value
11545 MachineInstr *Update;
11546 /// The number of the operand of the loop counter value in Update
11547 unsigned UpdateCounterOprNum;
11548 /// The initial value of the loop counter
11549 Register Init;
11550 /// True iff Update is a predecessor of Comp
11551 bool IsUpdatePriorComp;
11552
11553 /// The normalized condition used by createTripCountGreaterCondition()
11555
11556public:
11557 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
11558 MachineInstr *Comp, unsigned CompCounterOprNum,
11559 MachineInstr *Update, unsigned UpdateCounterOprNum,
11560 Register Init, bool IsUpdatePriorComp,
11561 const SmallVectorImpl<MachineOperand> &Cond)
11562 : MF(Comp->getParent()->getParent()),
11563 TII(MF->getSubtarget().getInstrInfo()),
11564 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
11565 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
11566 CompCounterOprNum(CompCounterOprNum), Update(Update),
11567 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
11568 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
11569
11570 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
11571 // Make the instructions for loop control be placed in stage 0.
11572 // The predecessors of Comp are considered by the caller.
11573 return MI == Comp;
11574 }
11575
11576 std::optional<bool> createTripCountGreaterCondition(
11577 int TC, MachineBasicBlock &MBB,
11578 SmallVectorImpl<MachineOperand> &CondParam) override {
11579 // A branch instruction will be inserted as "if (Cond) goto epilogue".
11580 // Cond is normalized for such use.
11581 // The predecessors of the branch are assumed to have already been inserted.
11582 CondParam = Cond;
11583 return {};
11584 }
11585
11586 void createRemainingIterationsGreaterCondition(
11587 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
11588 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
11589
11590 void setPreheader(MachineBasicBlock *NewPreheader) override {}
11591
11592 void adjustTripCount(int TripCountAdjust) override {}
11593
11594 bool isMVEExpanderSupported() override { return true; }
11595};
11596} // namespace
11597
11598/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
11599/// is replaced by ReplaceReg. The output register is newly created.
11600/// The other operands are unchanged from MI.
11601static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
11602 Register ReplaceReg, MachineBasicBlock &MBB,
11603 MachineBasicBlock::iterator InsertTo) {
11604 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
11605 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
11606 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
11607 Register Result = 0;
11608 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
11609 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
11610 Result = MRI.createVirtualRegister(
11611 MRI.getRegClass(NewMI->getOperand(0).getReg()));
11612 NewMI->getOperand(I).setReg(Result);
11613 } else if (I == ReplaceOprNum) {
11614 MRI.constrainRegClass(ReplaceReg, TII->getRegClass(NewMI->getDesc(), I));
11615 NewMI->getOperand(I).setReg(ReplaceReg);
11616 }
11617 }
11618 MBB.insert(InsertTo, NewMI);
11619 return Result;
11620}
11621
11622void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
11625 // Create and accumulate conditions for next TC iterations.
11626 // Example:
11627 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
11628 // # iteration of the kernel
11629 //
11630 // # insert the following instructions
11631 // cond = CSINCXr 0, 0, C, implicit $nzcv
11632 // counter = ADDXri counter, 1 # clone from this->Update
11633 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
11634 // cond = CSINCXr cond, cond, C, implicit $nzcv
11635 // ... (repeat TC times)
11636 // SUBSXri cond, 0, implicit-def $nzcv
11637
11638 assert(CondBranch->getOpcode() == AArch64::Bcc);
11639 // CondCode to exit the loop
11641 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
11642 if (CondBranch->getOperand(1).getMBB() == LoopBB)
11644
11645 // Accumulate conditions to exit the loop
11646 Register AccCond = AArch64::XZR;
11647
11648 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
11649 auto AccumulateCond = [&](Register CurCond,
11651 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11652 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11653 .addReg(NewCond, RegState::Define)
11654 .addReg(CurCond)
11655 .addReg(CurCond)
11657 return NewCond;
11658 };
11659
11660 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11661 // Update and Comp for I==0 are already exists in MBB
11662 // (MBB is an unrolled kernel)
11663 Register Counter;
11664 for (int I = 0; I <= TC; ++I) {
11665 Register NextCounter;
11666 if (I != 0)
11667 NextCounter =
11668 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11669
11670 AccCond = AccumulateCond(AccCond, CC);
11671
11672 if (I != TC) {
11673 if (I == 0) {
11674 if (Update != Comp && IsUpdatePriorComp) {
11675 Counter =
11676 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11677 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11678 MBB.end());
11679 } else {
11680 // can use already calculated value
11681 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11682 }
11683 } else if (Update != Comp) {
11684 NextCounter =
11685 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11686 }
11687 }
11688 Counter = NextCounter;
11689 }
11690 } else {
11691 Register Counter;
11692 if (LastStage0Insts.empty()) {
11693 // use initial counter value (testing if the trip count is sufficient to
11694 // be executed by pipelined code)
11695 Counter = Init;
11696 if (IsUpdatePriorComp)
11697 Counter =
11698 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11699 } else {
11700 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11701 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11702 }
11703
11704 for (int I = 0; I <= TC; ++I) {
11705 Register NextCounter;
11706 NextCounter =
11707 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11708 AccCond = AccumulateCond(AccCond, CC);
11709 if (I != TC && Update != Comp)
11710 NextCounter =
11711 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11712 Counter = NextCounter;
11713 }
11714 }
11715
11716 // If AccCond == 0, the remainder is greater than TC.
11717 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11718 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11719 .addReg(AccCond)
11720 .addImm(0)
11721 .addImm(0);
11722 Cond.clear();
11724}
11725
11726static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11727 Register &RegMBB, Register &RegOther) {
11728 assert(Phi.getNumOperands() == 5);
11729 if (Phi.getOperand(2).getMBB() == MBB) {
11730 RegMBB = Phi.getOperand(1).getReg();
11731 RegOther = Phi.getOperand(3).getReg();
11732 } else {
11733 assert(Phi.getOperand(4).getMBB() == MBB);
11734 RegMBB = Phi.getOperand(3).getReg();
11735 RegOther = Phi.getOperand(1).getReg();
11736 }
11737}
11738
11740 if (!Reg.isVirtual())
11741 return false;
11742 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11743 return MRI.getVRegDef(Reg)->getParent() != BB;
11744}
11745
11746/// If Reg is an induction variable, return true and set some parameters
11747static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11748 MachineInstr *&UpdateInst,
11749 unsigned &UpdateCounterOprNum, Register &InitReg,
11750 bool &IsUpdatePriorComp) {
11751 // Example:
11752 //
11753 // Preheader:
11754 // InitReg = ...
11755 // LoopBB:
11756 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11757 // Reg = COPY Reg0 ; COPY is ignored.
11758 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11759 // ; Reg is the value calculated in the previous
11760 // ; iteration, so IsUpdatePriorComp == false.
11761
11762 if (LoopBB->pred_size() != 2)
11763 return false;
11764 if (!Reg.isVirtual())
11765 return false;
11766 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11767 UpdateInst = nullptr;
11768 UpdateCounterOprNum = 0;
11769 InitReg = 0;
11770 IsUpdatePriorComp = true;
11771 Register CurReg = Reg;
11772 while (true) {
11773 MachineInstr *Def = MRI.getVRegDef(CurReg);
11774 if (Def->getParent() != LoopBB)
11775 return false;
11776 if (Def->isCopy()) {
11777 // Ignore copy instructions unless they contain subregisters
11778 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11779 return false;
11780 CurReg = Def->getOperand(1).getReg();
11781 } else if (Def->isPHI()) {
11782 if (InitReg != 0)
11783 return false;
11784 if (!UpdateInst)
11785 IsUpdatePriorComp = false;
11786 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11787 } else {
11788 if (UpdateInst)
11789 return false;
11790 switch (Def->getOpcode()) {
11791 case AArch64::ADDSXri:
11792 case AArch64::ADDSWri:
11793 case AArch64::SUBSXri:
11794 case AArch64::SUBSWri:
11795 case AArch64::ADDXri:
11796 case AArch64::ADDWri:
11797 case AArch64::SUBXri:
11798 case AArch64::SUBWri:
11799 UpdateInst = Def;
11800 UpdateCounterOprNum = 1;
11801 break;
11802 case AArch64::ADDSXrr:
11803 case AArch64::ADDSWrr:
11804 case AArch64::SUBSXrr:
11805 case AArch64::SUBSWrr:
11806 case AArch64::ADDXrr:
11807 case AArch64::ADDWrr:
11808 case AArch64::SUBXrr:
11809 case AArch64::SUBWrr:
11810 UpdateInst = Def;
11811 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11812 UpdateCounterOprNum = 1;
11813 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11814 UpdateCounterOprNum = 2;
11815 else
11816 return false;
11817 break;
11818 default:
11819 return false;
11820 }
11821 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11822 }
11823
11824 if (!CurReg.isVirtual())
11825 return false;
11826 if (Reg == CurReg)
11827 break;
11828 }
11829
11830 if (!UpdateInst)
11831 return false;
11832
11833 return true;
11834}
11835
11836std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11838 // Accept loops that meet the following conditions
11839 // * The conditional branch is BCC
11840 // * The compare instruction is ADDS/SUBS/WHILEXX
11841 // * One operand of the compare is an induction variable and the other is a
11842 // loop invariant value
11843 // * The induction variable is incremented/decremented by a single instruction
11844 // * Does not contain CALL or instructions which have unmodeled side effects
11845
11846 for (MachineInstr &MI : *LoopBB)
11847 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11848 // This instruction may use NZCV, which interferes with the instruction to
11849 // be inserted for loop control.
11850 return nullptr;
11851
11852 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11854 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11855 return nullptr;
11856
11857 // Infinite loops are not supported
11858 if (TBB == LoopBB && FBB == LoopBB)
11859 return nullptr;
11860
11861 // Must be conditional branch
11862 if (TBB != LoopBB && FBB == nullptr)
11863 return nullptr;
11864
11865 assert((TBB == LoopBB || FBB == LoopBB) &&
11866 "The Loop must be a single-basic-block loop");
11867
11868 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11870
11871 if (CondBranch->getOpcode() != AArch64::Bcc)
11872 return nullptr;
11873
11874 // Normalization for createTripCountGreaterCondition()
11875 if (TBB == LoopBB)
11877
11878 MachineInstr *Comp = nullptr;
11879 unsigned CompCounterOprNum = 0;
11880 for (MachineInstr &MI : reverse(*LoopBB)) {
11881 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11882 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11883 // operands is a loop invariant value
11884
11885 switch (MI.getOpcode()) {
11886 case AArch64::SUBSXri:
11887 case AArch64::SUBSWri:
11888 case AArch64::ADDSXri:
11889 case AArch64::ADDSWri:
11890 Comp = &MI;
11891 CompCounterOprNum = 1;
11892 break;
11893 case AArch64::ADDSWrr:
11894 case AArch64::ADDSXrr:
11895 case AArch64::SUBSWrr:
11896 case AArch64::SUBSXrr:
11897 Comp = &MI;
11898 break;
11899 default:
11900 if (isWhileOpcode(MI.getOpcode())) {
11901 Comp = &MI;
11902 break;
11903 }
11904 return nullptr;
11905 }
11906
11907 if (CompCounterOprNum == 0) {
11908 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11909 CompCounterOprNum = 2;
11910 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11911 CompCounterOprNum = 1;
11912 else
11913 return nullptr;
11914 }
11915 break;
11916 }
11917 }
11918 if (!Comp)
11919 return nullptr;
11920
11921 MachineInstr *Update = nullptr;
11922 Register Init;
11923 bool IsUpdatePriorComp;
11924 unsigned UpdateCounterOprNum;
11925 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11926 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11927 return nullptr;
11928
11929 return std::make_unique<AArch64PipelinerLoopInfo>(
11930 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11931 Init, IsUpdatePriorComp, Cond);
11932}
11933
11934/// verifyInstruction - Perform target specific instruction verification.
11935bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11936 StringRef &ErrInfo) const {
11937 // Verify that immediate offsets on load/store instructions are within range.
11938 // Stack objects with an FI operand are excluded as they can be fixed up
11939 // during PEI.
11940 TypeSize Scale(0U, false), Width(0U, false);
11941 int64_t MinOffset, MaxOffset;
11942 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11943 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11944 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11945 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11946 if (Imm < MinOffset || Imm > MaxOffset) {
11947 ErrInfo = "Unexpected immediate on load/store instruction";
11948 return false;
11949 }
11950 }
11951 }
11952
11953 const MCInstrDesc &MCID = MI.getDesc();
11954 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11955 const MachineOperand &MO = MI.getOperand(Op);
11956 switch (MCID.operands()[Op].OperandType) {
11958 if (!MO.isImm() || MO.getImm() != 0) {
11959 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11960 return false;
11961 }
11962 break;
11964 if (!MO.isImm() ||
11966 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11967 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11968 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11969 return false;
11970 }
11971 break;
11972 default:
11973 break;
11974 }
11975 }
11976 return true;
11977}
11978
11979#define GET_INSTRINFO_HELPERS
11980#define GET_INSTRMAP_INFO
11981#include "AArch64GenInstrInfo.inc"
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isFrameStoreOpcode(int Opcode)
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static bool isFrameLoadOpcode(int Opcode)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI)
static bool isANDOpcode(MachineInstr &MI)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static bool mustAvoidNeonAtMBBI(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
Returns true if in a streaming call site region without SME-FA64.
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static std::optional< unsigned > getLFIInstSizeInBytes(const MachineInstr &MI)
Return the maximum number of bytes of code the specified instruction may be after LFI rewriting.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool isInStreamingCallSiteRegion(MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
Returns true if the instruction at I is in a streaming call site region, within a single basic block.
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, RegState State, const TargetRegisterInfo *TRI)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
SignReturnAddress getSignReturnAddressCondition() const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool shouldSignReturnAddress(SignReturnAddress Condition, bool IsLRSpilled)
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
static bool isZExtLoad(const MachineInstr &MI)
Returns whether the instruction is a zero-extending load.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
void createPauthEpilogueInstr(MachineBasicBlock &MBB, DebugLoc DL) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSExtLoad(const MachineInstr &MI)
Returns whether the instruction is a sign-extending load.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
Get the first element.
Definition ArrayRef.h:144
size_t size() const
Get the array size.
Definition ArrayRef.h:141
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:113
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:66
bool usesWindowsCFI() const
Definition MCAsmInfo.h:674
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:615
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:657
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:630
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:727
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
static constexpr unsigned NoRegister
Definition MCRegister.h:60
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
bool def_empty(Register RegNo) const
def_empty - Return true if there are no instructions defining the specified register (it may be live-...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents a location in source code.
Definition SMLoc.h:22
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
bool empty() const
Definition SmallSet.h:169
bool erase(const T &V)
Definition SmallSet.h:200
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:49
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:40
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:207
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVAddr(unsigned Opcode, unsigned TargetFlags, bool IsTargetMachO, SmallVectorImpl< AddrInsnModel > &Insn)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
constexpr double e
InstrType
Represents how an instruction should be mapped by the outliner.
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
iterator end() const
Definition BasicBlock.h:89
LLVM_ABI Instruction & back() const
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
@ Renamable
Register that may be renamed.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr RegState getDefRegState(bool B)
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
static MCRegister getXRegFromWReg(MCRegister Reg)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:236
bool optimizeTerminators(MachineBasicBlock *MBB, const TargetInstrInfo &TII)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
constexpr RegState getUndefRegState(bool B)
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.