LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/DebugLoc.h"
45#include "llvm/IR/GlobalValue.h"
46#include "llvm/IR/Module.h"
47#include "llvm/MC/MCAsmInfo.h"
48#include "llvm/MC/MCInst.h"
50#include "llvm/MC/MCInstrDesc.h"
55#include "llvm/Support/LEB128.h"
59#include <cassert>
60#include <cstdint>
61#include <iterator>
62#include <utility>
63
64using namespace llvm;
65
66#define GET_INSTRINFO_CTOR_DTOR
67#include "AArch64GenInstrInfo.inc"
68
69#define DEBUG_TYPE "AArch64InstrInfo"
70
71STATISTIC(NumCopyInstrs, "Number of COPY instructions expanded");
72STATISTIC(NumZCRegMoveInstrsGPR, "Number of zero-cycle GPR register move "
73 "instructions expanded from canonical COPY");
74STATISTIC(NumZCRegMoveInstrsFPR, "Number of zero-cycle FPR register move "
75 "instructions expanded from canonical COPY");
76STATISTIC(NumZCZeroingInstrsGPR, "Number of zero-cycle GPR zeroing "
77 "instructions expanded from canonical COPY");
78// NumZCZeroingInstrsFPR is counted at AArch64AsmPrinter
79
81 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
82 cl::desc("Restrict range of CB instructions (DEBUG)"));
83
85 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
86 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
87
89 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
90 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
91
93 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
94 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
95
97 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
98 cl::desc("Restrict range of B instructions (DEBUG)"));
99
101 "aarch64-search-limit", cl::Hidden, cl::init(2048),
102 cl::desc("Restrict range of instructions to search for the "
103 "machine-combiner gather pattern optimization"));
104
106 : AArch64GenInstrInfo(STI, RI, AArch64::ADJCALLSTACKDOWN,
107 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
108 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
109
110/// GetInstSize - Return the number of bytes of code the specified
111/// instruction may be. This returns the maximum number of bytes.
113 const MachineBasicBlock &MBB = *MI.getParent();
114 const MachineFunction *MF = MBB.getParent();
115 const Function &F = MF->getFunction();
116 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
117
118 {
119 auto Op = MI.getOpcode();
120 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
121 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
122 }
123
124 // Meta-instructions emit no code.
125 if (MI.isMetaInstruction())
126 return 0;
127
128 // FIXME: We currently only handle pseudoinstructions that don't get expanded
129 // before the assembly printer.
130 unsigned NumBytes = 0;
131 const MCInstrDesc &Desc = MI.getDesc();
132
133 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
134 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
135
136 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
137 if (!MFI->shouldSignReturnAddress(*MF))
138 return NumBytes;
139
140 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
141 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
142 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
143 return NumBytes;
144 }
145
146 // Size should be preferably set in
147 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
148 // Specific cases handle instructions of variable sizes
149 switch (Desc.getOpcode()) {
150 default:
151 if (Desc.getSize())
152 return Desc.getSize();
153
154 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
155 // with fixed constant size but not specified in .td file) is a normal
156 // 4-byte insn.
157 NumBytes = 4;
158 break;
159 case TargetOpcode::STACKMAP:
160 // The upper bound for a stackmap intrinsic is the full length of its shadow
161 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
162 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
163 break;
164 case TargetOpcode::PATCHPOINT:
165 // The size of the patchpoint intrinsic is the number of bytes requested
166 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
167 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
168 break;
169 case TargetOpcode::STATEPOINT:
170 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
171 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
172 // No patch bytes means a normal call inst is emitted
173 if (NumBytes == 0)
174 NumBytes = 4;
175 break;
176 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
177 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
178 // instructions are expanded to the specified number of NOPs. Otherwise,
179 // they are expanded to 36-byte XRay sleds.
180 NumBytes =
181 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
182 break;
183 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
184 case TargetOpcode::PATCHABLE_TAIL_CALL:
185 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
186 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
187 NumBytes = 36;
188 break;
189 case TargetOpcode::PATCHABLE_EVENT_CALL:
190 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
191 NumBytes = 24;
192 break;
193
194 case AArch64::SPACE:
195 NumBytes = MI.getOperand(1).getImm();
196 break;
197 case TargetOpcode::BUNDLE:
198 NumBytes = getInstBundleLength(MI);
199 break;
200 }
201
202 return NumBytes;
203}
204
205unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
206 unsigned Size = 0;
208 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
209 while (++I != E && I->isInsideBundle()) {
210 assert(!I->isBundle() && "No nested bundle!");
212 }
213 return Size;
214}
215
218 // Block ends with fall-through condbranch.
219 switch (LastInst->getOpcode()) {
220 default:
221 llvm_unreachable("Unknown branch instruction?");
222 case AArch64::Bcc:
223 Target = LastInst->getOperand(1).getMBB();
224 Cond.push_back(LastInst->getOperand(0));
225 break;
226 case AArch64::CBZW:
227 case AArch64::CBZX:
228 case AArch64::CBNZW:
229 case AArch64::CBNZX:
230 Target = LastInst->getOperand(1).getMBB();
231 Cond.push_back(MachineOperand::CreateImm(-1));
232 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
233 Cond.push_back(LastInst->getOperand(0));
234 break;
235 case AArch64::TBZW:
236 case AArch64::TBZX:
237 case AArch64::TBNZW:
238 case AArch64::TBNZX:
239 Target = LastInst->getOperand(2).getMBB();
240 Cond.push_back(MachineOperand::CreateImm(-1));
241 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
242 Cond.push_back(LastInst->getOperand(0));
243 Cond.push_back(LastInst->getOperand(1));
244 break;
245 case AArch64::CBWPri:
246 case AArch64::CBXPri:
247 case AArch64::CBWPrr:
248 case AArch64::CBXPrr:
249 Target = LastInst->getOperand(3).getMBB();
250 Cond.push_back(MachineOperand::CreateImm(-1));
251 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
252 Cond.push_back(LastInst->getOperand(0));
253 Cond.push_back(LastInst->getOperand(1));
254 Cond.push_back(LastInst->getOperand(2));
255 break;
256 case AArch64::CBBAssertExt:
257 case AArch64::CBHAssertExt:
258 Target = LastInst->getOperand(3).getMBB();
259 Cond.push_back(MachineOperand::CreateImm(-1)); // -1
260 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); // Opc
261 Cond.push_back(LastInst->getOperand(0)); // Cond
262 Cond.push_back(LastInst->getOperand(1)); // Op0
263 Cond.push_back(LastInst->getOperand(2)); // Op1
264 Cond.push_back(LastInst->getOperand(4)); // Ext0
265 Cond.push_back(LastInst->getOperand(5)); // Ext1
266 break;
267 }
268}
269
270static unsigned getBranchDisplacementBits(unsigned Opc) {
271 switch (Opc) {
272 default:
273 llvm_unreachable("unexpected opcode!");
274 case AArch64::B:
275 return BDisplacementBits;
276 case AArch64::TBNZW:
277 case AArch64::TBZW:
278 case AArch64::TBNZX:
279 case AArch64::TBZX:
280 return TBZDisplacementBits;
281 case AArch64::CBNZW:
282 case AArch64::CBZW:
283 case AArch64::CBNZX:
284 case AArch64::CBZX:
285 return CBZDisplacementBits;
286 case AArch64::Bcc:
287 return BCCDisplacementBits;
288 case AArch64::CBWPri:
289 case AArch64::CBXPri:
290 case AArch64::CBBAssertExt:
291 case AArch64::CBHAssertExt:
292 case AArch64::CBWPrr:
293 case AArch64::CBXPrr:
294 return CBDisplacementBits;
295 }
296}
297
299 int64_t BrOffset) const {
300 unsigned Bits = getBranchDisplacementBits(BranchOp);
301 assert(Bits >= 3 && "max branch displacement must be enough to jump"
302 "over conditional branch expansion");
303 return isIntN(Bits, BrOffset / 4);
304}
305
308 switch (MI.getOpcode()) {
309 default:
310 llvm_unreachable("unexpected opcode!");
311 case AArch64::B:
312 return MI.getOperand(0).getMBB();
313 case AArch64::TBZW:
314 case AArch64::TBNZW:
315 case AArch64::TBZX:
316 case AArch64::TBNZX:
317 return MI.getOperand(2).getMBB();
318 case AArch64::CBZW:
319 case AArch64::CBNZW:
320 case AArch64::CBZX:
321 case AArch64::CBNZX:
322 case AArch64::Bcc:
323 return MI.getOperand(1).getMBB();
324 case AArch64::CBWPri:
325 case AArch64::CBXPri:
326 case AArch64::CBBAssertExt:
327 case AArch64::CBHAssertExt:
328 case AArch64::CBWPrr:
329 case AArch64::CBXPrr:
330 return MI.getOperand(3).getMBB();
331 }
332}
333
335 MachineBasicBlock &NewDestBB,
336 MachineBasicBlock &RestoreBB,
337 const DebugLoc &DL,
338 int64_t BrOffset,
339 RegScavenger *RS) const {
340 assert(RS && "RegScavenger required for long branching");
341 assert(MBB.empty() &&
342 "new block should be inserted for expanding unconditional branch");
343 assert(MBB.pred_size() == 1);
344 assert(RestoreBB.empty() &&
345 "restore block should be inserted for restoring clobbered registers");
346
347 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
348 // Offsets outside of the signed 33-bit range are not supported for ADRP +
349 // ADD.
350 if (!isInt<33>(BrOffset))
352 "Branch offsets outside of the signed 33-bit range not supported");
353
354 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
355 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
356 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
357 .addReg(Reg)
358 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
359 .addImm(0);
360 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
361 };
362
363 RS->enterBasicBlockEnd(MBB);
364 // If X16 is unused, we can rely on the linker to insert a range extension
365 // thunk if NewDestBB is out of range of a single B instruction.
366 constexpr Register Reg = AArch64::X16;
367 if (!RS->isRegUsed(Reg)) {
368 insertUnconditionalBranch(MBB, &NewDestBB, DL);
369 RS->setRegUsed(Reg);
370 return;
371 }
372
373 // If there's a free register and it's worth inflating the code size,
374 // manually insert the indirect branch.
375 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
376 if (Scavenged != AArch64::NoRegister &&
377 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
378 buildIndirectBranch(Scavenged, NewDestBB);
379 RS->setRegUsed(Scavenged);
380 return;
381 }
382
383 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
384 // with red zones.
385 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
386 if (!AFI || AFI->hasRedZone().value_or(true))
388 "Unable to insert indirect branch inside function that has red zone");
389
390 // Otherwise, spill X16 and defer range extension to the linker.
391 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
392 .addReg(AArch64::SP, RegState::Define)
393 .addReg(Reg)
394 .addReg(AArch64::SP)
395 .addImm(-16);
396
397 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
398
399 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
400 .addReg(AArch64::SP, RegState::Define)
402 .addReg(AArch64::SP)
403 .addImm(16);
404}
405
406// Branch analysis.
409 MachineBasicBlock *&FBB,
411 bool AllowModify) const {
412 // If the block has no terminators, it just falls into the block after it.
413 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
414 if (I == MBB.end())
415 return false;
416
417 // Skip over SpeculationBarrierEndBB terminators
418 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
419 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
420 --I;
421 }
422
423 if (!isUnpredicatedTerminator(*I))
424 return false;
425
426 // Get the last instruction in the block.
427 MachineInstr *LastInst = &*I;
428
429 // If there is only one terminator instruction, process it.
430 unsigned LastOpc = LastInst->getOpcode();
431 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
432 if (isUncondBranchOpcode(LastOpc)) {
433 TBB = LastInst->getOperand(0).getMBB();
434 return false;
435 }
436 if (isCondBranchOpcode(LastOpc)) {
437 // Block ends with fall-through condbranch.
438 parseCondBranch(LastInst, TBB, Cond);
439 return false;
440 }
441 return true; // Can't handle indirect branch.
442 }
443
444 // Get the instruction before it if it is a terminator.
445 MachineInstr *SecondLastInst = &*I;
446 unsigned SecondLastOpc = SecondLastInst->getOpcode();
447
448 // If AllowModify is true and the block ends with two or more unconditional
449 // branches, delete all but the first unconditional branch.
450 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
451 while (isUncondBranchOpcode(SecondLastOpc)) {
452 LastInst->eraseFromParent();
453 LastInst = SecondLastInst;
454 LastOpc = LastInst->getOpcode();
455 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
456 // Return now the only terminator is an unconditional branch.
457 TBB = LastInst->getOperand(0).getMBB();
458 return false;
459 }
460 SecondLastInst = &*I;
461 SecondLastOpc = SecondLastInst->getOpcode();
462 }
463 }
464
465 // If we're allowed to modify and the block ends in a unconditional branch
466 // which could simply fallthrough, remove the branch. (Note: This case only
467 // matters when we can't understand the whole sequence, otherwise it's also
468 // handled by BranchFolding.cpp.)
469 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
470 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
471 LastInst->eraseFromParent();
472 LastInst = SecondLastInst;
473 LastOpc = LastInst->getOpcode();
474 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
475 assert(!isUncondBranchOpcode(LastOpc) &&
476 "unreachable unconditional branches removed above");
477
478 if (isCondBranchOpcode(LastOpc)) {
479 // Block ends with fall-through condbranch.
480 parseCondBranch(LastInst, TBB, Cond);
481 return false;
482 }
483 return true; // Can't handle indirect branch.
484 }
485 SecondLastInst = &*I;
486 SecondLastOpc = SecondLastInst->getOpcode();
487 }
488
489 // If there are three terminators, we don't know what sort of block this is.
490 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
491 return true;
492
493 // If the block ends with a B and a Bcc, handle it.
494 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
495 parseCondBranch(SecondLastInst, TBB, Cond);
496 FBB = LastInst->getOperand(0).getMBB();
497 return false;
498 }
499
500 // If the block ends with two unconditional branches, handle it. The second
501 // one is not executed, so remove it.
502 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
503 TBB = SecondLastInst->getOperand(0).getMBB();
504 I = LastInst;
505 if (AllowModify)
506 I->eraseFromParent();
507 return false;
508 }
509
510 // ...likewise if it ends with an indirect branch followed by an unconditional
511 // branch.
512 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
513 I = LastInst;
514 if (AllowModify)
515 I->eraseFromParent();
516 return true;
517 }
518
519 // Otherwise, can't handle this.
520 return true;
521}
522
524 MachineBranchPredicate &MBP,
525 bool AllowModify) const {
526 // For the moment, handle only a block which ends with a cb(n)zx followed by
527 // a fallthrough. Why this? Because it is a common form.
528 // TODO: Should we handle b.cc?
529
530 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
531 if (I == MBB.end())
532 return true;
533
534 // Skip over SpeculationBarrierEndBB terminators
535 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
536 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
537 --I;
538 }
539
540 if (!isUnpredicatedTerminator(*I))
541 return true;
542
543 // Get the last instruction in the block.
544 MachineInstr *LastInst = &*I;
545 unsigned LastOpc = LastInst->getOpcode();
546 if (!isCondBranchOpcode(LastOpc))
547 return true;
548
549 switch (LastOpc) {
550 default:
551 return true;
552 case AArch64::CBZW:
553 case AArch64::CBZX:
554 case AArch64::CBNZW:
555 case AArch64::CBNZX:
556 break;
557 };
558
559 MBP.TrueDest = LastInst->getOperand(1).getMBB();
560 assert(MBP.TrueDest && "expected!");
561 MBP.FalseDest = MBB.getNextNode();
562
563 MBP.ConditionDef = nullptr;
564 MBP.SingleUseCondition = false;
565
566 MBP.LHS = LastInst->getOperand(0);
567 MBP.RHS = MachineOperand::CreateImm(0);
568 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
569 ? MachineBranchPredicate::PRED_NE
570 : MachineBranchPredicate::PRED_EQ;
571 return false;
572}
573
576 if (Cond[0].getImm() != -1) {
577 // Regular Bcc
578 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
580 } else {
581 // Folded compare-and-branch
582 switch (Cond[1].getImm()) {
583 default:
584 llvm_unreachable("Unknown conditional branch!");
585 case AArch64::CBZW:
586 Cond[1].setImm(AArch64::CBNZW);
587 break;
588 case AArch64::CBNZW:
589 Cond[1].setImm(AArch64::CBZW);
590 break;
591 case AArch64::CBZX:
592 Cond[1].setImm(AArch64::CBNZX);
593 break;
594 case AArch64::CBNZX:
595 Cond[1].setImm(AArch64::CBZX);
596 break;
597 case AArch64::TBZW:
598 Cond[1].setImm(AArch64::TBNZW);
599 break;
600 case AArch64::TBNZW:
601 Cond[1].setImm(AArch64::TBZW);
602 break;
603 case AArch64::TBZX:
604 Cond[1].setImm(AArch64::TBNZX);
605 break;
606 case AArch64::TBNZX:
607 Cond[1].setImm(AArch64::TBZX);
608 break;
609
610 // Cond is { -1, Opcode, CC, Op0, Op1, ... }
611 case AArch64::CBWPri:
612 case AArch64::CBXPri:
613 case AArch64::CBBAssertExt:
614 case AArch64::CBHAssertExt:
615 case AArch64::CBWPrr:
616 case AArch64::CBXPrr: {
617 // Pseudos using standard 4bit Arm condition codes
619 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
621 }
622 }
623 }
624
625 return false;
626}
627
629 int *BytesRemoved) const {
630 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
631 if (I == MBB.end())
632 return 0;
633
634 if (!isUncondBranchOpcode(I->getOpcode()) &&
635 !isCondBranchOpcode(I->getOpcode()))
636 return 0;
637
638 // Remove the branch.
639 I->eraseFromParent();
640
641 I = MBB.end();
642
643 if (I == MBB.begin()) {
644 if (BytesRemoved)
645 *BytesRemoved = 4;
646 return 1;
647 }
648 --I;
649 if (!isCondBranchOpcode(I->getOpcode())) {
650 if (BytesRemoved)
651 *BytesRemoved = 4;
652 return 1;
653 }
654
655 // Remove the branch.
656 I->eraseFromParent();
657 if (BytesRemoved)
658 *BytesRemoved = 8;
659
660 return 2;
661}
662
663void AArch64InstrInfo::instantiateCondBranch(
666 if (Cond[0].getImm() != -1) {
667 // Regular Bcc
668 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
669 } else {
670 // Folded compare-and-branch
671 // Note that we use addOperand instead of addReg to keep the flags.
672
673 // cbz, cbnz
674 const MachineInstrBuilder MIB =
675 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
676
677 // tbz/tbnz
678 if (Cond.size() > 3)
679 MIB.add(Cond[3]);
680
681 // cb
682 if (Cond.size() > 4)
683 MIB.add(Cond[4]);
684
685 MIB.addMBB(TBB);
686
687 // cb[b,h]
688 if (Cond.size() > 5) {
689 MIB.addImm(Cond[5].getImm());
690 MIB.addImm(Cond[6].getImm());
691 }
692 }
693}
694
697 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
698 // Shouldn't be a fall through.
699 assert(TBB && "insertBranch must not be told to insert a fallthrough");
700
701 if (!FBB) {
702 if (Cond.empty()) // Unconditional branch?
703 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
704 else
705 instantiateCondBranch(MBB, DL, TBB, Cond);
706
707 if (BytesAdded)
708 *BytesAdded = 4;
709
710 return 1;
711 }
712
713 // Two-way conditional branch.
714 instantiateCondBranch(MBB, DL, TBB, Cond);
715 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
716
717 if (BytesAdded)
718 *BytesAdded = 8;
719
720 return 2;
721}
722
724 const TargetInstrInfo &TII) {
725 for (MachineInstr &MI : MBB->terminators()) {
726 unsigned Opc = MI.getOpcode();
727 switch (Opc) {
728 case AArch64::CBZW:
729 case AArch64::CBZX:
730 case AArch64::TBZW:
731 case AArch64::TBZX:
732 // CBZ/TBZ with WZR/XZR -> unconditional B
733 if (MI.getOperand(0).getReg() == AArch64::WZR ||
734 MI.getOperand(0).getReg() == AArch64::XZR) {
735 DEBUG_WITH_TYPE("optimizeTerminators",
736 dbgs() << "Removing always taken branch: " << MI);
737 MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
738 SmallVector<MachineBasicBlock *> Succs(MBB->successors());
739 for (auto *S : Succs)
740 if (S != Target)
741 MBB->removeSuccessor(S);
742 DebugLoc DL = MI.getDebugLoc();
743 while (MBB->rbegin() != &MI)
744 MBB->rbegin()->eraseFromParent();
745 MI.eraseFromParent();
746 BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
747 return true;
748 }
749 break;
750 case AArch64::CBNZW:
751 case AArch64::CBNZX:
752 case AArch64::TBNZW:
753 case AArch64::TBNZX:
754 // CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
755 if (MI.getOperand(0).getReg() == AArch64::WZR ||
756 MI.getOperand(0).getReg() == AArch64::XZR) {
757 DEBUG_WITH_TYPE("optimizeTerminators",
758 dbgs() << "Removing never taken branch: " << MI);
759 MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
760 MI.getParent()->removeSuccessor(Target);
761 MI.eraseFromParent();
762 return true;
763 }
764 break;
765 }
766 }
767 return false;
768}
769
770// Find the original register that VReg is copied from.
771static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
772 while (Register::isVirtualRegister(VReg)) {
773 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
774 if (!DefMI->isFullCopy())
775 return VReg;
776 VReg = DefMI->getOperand(1).getReg();
777 }
778 return VReg;
779}
780
781// Determine if VReg is defined by an instruction that can be folded into a
782// csel instruction. If so, return the folded opcode, and the replacement
783// register.
784static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
785 unsigned *NewReg = nullptr) {
786 VReg = removeCopies(MRI, VReg);
788 return 0;
789
790 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
791 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
792 unsigned Opc = 0;
793 unsigned SrcReg = 0;
794 switch (DefMI->getOpcode()) {
795 case AArch64::SUBREG_TO_REG:
796 // Check for the following way to define an 64-bit immediate:
797 // %0:gpr32 = MOVi32imm 1
798 // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
799 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
800 return 0;
801 if (!DefMI->getOperand(2).isReg())
802 return 0;
803 if (!DefMI->getOperand(3).isImm() ||
804 DefMI->getOperand(3).getImm() != AArch64::sub_32)
805 return 0;
806 DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
807 if (DefMI->getOpcode() != AArch64::MOVi32imm)
808 return 0;
809 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
810 return 0;
811 assert(Is64Bit);
812 SrcReg = AArch64::XZR;
813 Opc = AArch64::CSINCXr;
814 break;
815
816 case AArch64::MOVi32imm:
817 case AArch64::MOVi64imm:
818 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
819 return 0;
820 SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
821 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
822 break;
823
824 case AArch64::ADDSXri:
825 case AArch64::ADDSWri:
826 // if NZCV is used, do not fold.
827 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
828 true) == -1)
829 return 0;
830 // fall-through to ADDXri and ADDWri.
831 [[fallthrough]];
832 case AArch64::ADDXri:
833 case AArch64::ADDWri:
834 // add x, 1 -> csinc.
835 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
836 DefMI->getOperand(3).getImm() != 0)
837 return 0;
838 SrcReg = DefMI->getOperand(1).getReg();
839 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
840 break;
841
842 case AArch64::ORNXrr:
843 case AArch64::ORNWrr: {
844 // not x -> csinv, represented as orn dst, xzr, src.
845 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
846 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
847 return 0;
848 SrcReg = DefMI->getOperand(2).getReg();
849 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
850 break;
851 }
852
853 case AArch64::SUBSXrr:
854 case AArch64::SUBSWrr:
855 // if NZCV is used, do not fold.
856 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
857 true) == -1)
858 return 0;
859 // fall-through to SUBXrr and SUBWrr.
860 [[fallthrough]];
861 case AArch64::SUBXrr:
862 case AArch64::SUBWrr: {
863 // neg x -> csneg, represented as sub dst, xzr, src.
864 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
865 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
866 return 0;
867 SrcReg = DefMI->getOperand(2).getReg();
868 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
869 break;
870 }
871 default:
872 return 0;
873 }
874 assert(Opc && SrcReg && "Missing parameters");
875
876 if (NewReg)
877 *NewReg = SrcReg;
878 return Opc;
879}
880
883 Register DstReg, Register TrueReg,
884 Register FalseReg, int &CondCycles,
885 int &TrueCycles,
886 int &FalseCycles) const {
887 // Check register classes.
888 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
889 const TargetRegisterClass *RC =
890 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
891 if (!RC)
892 return false;
893
894 // Also need to check the dest regclass, in case we're trying to optimize
895 // something like:
896 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
897 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
898 return false;
899
900 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
901 unsigned ExtraCondLat = Cond.size() != 1;
902
903 // GPRs are handled by csel.
904 // FIXME: Fold in x+1, -x, and ~x when applicable.
905 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
906 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
907 // Single-cycle csel, csinc, csinv, and csneg.
908 CondCycles = 1 + ExtraCondLat;
909 TrueCycles = FalseCycles = 1;
910 if (canFoldIntoCSel(MRI, TrueReg))
911 TrueCycles = 0;
912 else if (canFoldIntoCSel(MRI, FalseReg))
913 FalseCycles = 0;
914 return true;
915 }
916
917 // Scalar floating point is handled by fcsel.
918 // FIXME: Form fabs, fmin, and fmax when applicable.
919 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
920 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
921 CondCycles = 5 + ExtraCondLat;
922 TrueCycles = FalseCycles = 2;
923 return true;
924 }
925
926 // Can't do vectors.
927 return false;
928}
929
932 const DebugLoc &DL, Register DstReg,
934 Register TrueReg, Register FalseReg) const {
935 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
936
937 // Parse the condition code, see parseCondBranch() above.
939 switch (Cond.size()) {
940 default:
941 llvm_unreachable("Unknown condition opcode in Cond");
942 case 1: // b.cc
944 break;
945 case 3: { // cbz/cbnz
946 // We must insert a compare against 0.
947 bool Is64Bit;
948 switch (Cond[1].getImm()) {
949 default:
950 llvm_unreachable("Unknown branch opcode in Cond");
951 case AArch64::CBZW:
952 Is64Bit = false;
953 CC = AArch64CC::EQ;
954 break;
955 case AArch64::CBZX:
956 Is64Bit = true;
957 CC = AArch64CC::EQ;
958 break;
959 case AArch64::CBNZW:
960 Is64Bit = false;
961 CC = AArch64CC::NE;
962 break;
963 case AArch64::CBNZX:
964 Is64Bit = true;
965 CC = AArch64CC::NE;
966 break;
967 }
968 Register SrcReg = Cond[2].getReg();
969 if (Is64Bit) {
970 // cmp reg, #0 is actually subs xzr, reg, #0.
971 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
972 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
973 .addReg(SrcReg)
974 .addImm(0)
975 .addImm(0);
976 } else {
977 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
978 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
979 .addReg(SrcReg)
980 .addImm(0)
981 .addImm(0);
982 }
983 break;
984 }
985 case 4: { // tbz/tbnz
986 // We must insert a tst instruction.
987 switch (Cond[1].getImm()) {
988 default:
989 llvm_unreachable("Unknown branch opcode in Cond");
990 case AArch64::TBZW:
991 case AArch64::TBZX:
992 CC = AArch64CC::EQ;
993 break;
994 case AArch64::TBNZW:
995 case AArch64::TBNZX:
996 CC = AArch64CC::NE;
997 break;
998 }
999 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
1000 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
1001 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
1002 .addReg(Cond[2].getReg())
1003 .addImm(
1005 else
1006 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
1007 .addReg(Cond[2].getReg())
1008 .addImm(
1010 break;
1011 }
1012 case 5: { // cb
1013 // We must insert a cmp, that is a subs
1014 // 0 1 2 3 4
1015 // Cond is { -1, Opcode, CC, Op0, Op1 }
1016
1017 unsigned SubsOpc, SubsDestReg;
1018 bool IsImm = false;
1019 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1020 switch (Cond[1].getImm()) {
1021 default:
1022 llvm_unreachable("Unknown branch opcode in Cond");
1023 case AArch64::CBWPri:
1024 SubsOpc = AArch64::SUBSWri;
1025 SubsDestReg = AArch64::WZR;
1026 IsImm = true;
1027 break;
1028 case AArch64::CBXPri:
1029 SubsOpc = AArch64::SUBSXri;
1030 SubsDestReg = AArch64::XZR;
1031 IsImm = true;
1032 break;
1033 case AArch64::CBWPrr:
1034 SubsOpc = AArch64::SUBSWrr;
1035 SubsDestReg = AArch64::WZR;
1036 IsImm = false;
1037 break;
1038 case AArch64::CBXPrr:
1039 SubsOpc = AArch64::SUBSXrr;
1040 SubsDestReg = AArch64::XZR;
1041 IsImm = false;
1042 break;
1043 }
1044
1045 if (IsImm)
1046 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
1047 .addReg(Cond[3].getReg())
1048 .addImm(Cond[4].getImm())
1049 .addImm(0);
1050 else
1051 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
1052 .addReg(Cond[3].getReg())
1053 .addReg(Cond[4].getReg());
1054 } break;
1055 case 7: { // cb[b,h]
1056 // We must insert a cmp, that is a subs, but also zero- or sign-extensions
1057 // that have been folded. For the first operand we codegen an explicit
1058 // extension, for the second operand we fold the extension into cmp.
1059 // 0 1 2 3 4 5 6
1060 // Cond is { -1, Opcode, CC, Op0, Op1, Ext0, Ext1 }
1061
1062 // We need a new register for the now explicitly extended register
1063 Register Reg = Cond[4].getReg();
1065 unsigned ExtOpc;
1066 unsigned ExtBits;
1067 AArch64_AM::ShiftExtendType ExtendType =
1069 switch (ExtendType) {
1070 default:
1071 llvm_unreachable("Unknown shift-extend for CB instruction");
1072 case AArch64_AM::SXTB:
1073 assert(
1074 Cond[1].getImm() == AArch64::CBBAssertExt &&
1075 "Unexpected compare-and-branch instruction for SXTB shift-extend");
1076 ExtOpc = AArch64::SBFMWri;
1077 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1078 break;
1079 case AArch64_AM::SXTH:
1080 assert(
1081 Cond[1].getImm() == AArch64::CBHAssertExt &&
1082 "Unexpected compare-and-branch instruction for SXTH shift-extend");
1083 ExtOpc = AArch64::SBFMWri;
1084 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1085 break;
1086 case AArch64_AM::UXTB:
1087 assert(
1088 Cond[1].getImm() == AArch64::CBBAssertExt &&
1089 "Unexpected compare-and-branch instruction for UXTB shift-extend");
1090 ExtOpc = AArch64::ANDWri;
1091 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1092 break;
1093 case AArch64_AM::UXTH:
1094 assert(
1095 Cond[1].getImm() == AArch64::CBHAssertExt &&
1096 "Unexpected compare-and-branch instruction for UXTH shift-extend");
1097 ExtOpc = AArch64::ANDWri;
1098 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1099 break;
1100 }
1101
1102 // Build the explicit extension of the first operand
1103 Reg = MRI.createVirtualRegister(&AArch64::GPR32spRegClass);
1105 BuildMI(MBB, I, DL, get(ExtOpc), Reg).addReg(Cond[4].getReg());
1106 if (ExtOpc != AArch64::ANDWri)
1107 MBBI.addImm(0);
1108 MBBI.addImm(ExtBits);
1109 }
1110
1111 // Now, subs with an extended second operand
1113 AArch64_AM::ShiftExtendType ExtendType =
1115 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1116 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1117 BuildMI(MBB, I, DL, get(AArch64::SUBSWrx), AArch64::WZR)
1118 .addReg(Cond[3].getReg())
1119 .addReg(Reg)
1120 .addImm(AArch64_AM::getArithExtendImm(ExtendType, 0));
1121 } // If no extension is needed, just a regular subs
1122 else {
1123 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1124 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1125 BuildMI(MBB, I, DL, get(AArch64::SUBSWrr), AArch64::WZR)
1126 .addReg(Cond[3].getReg())
1127 .addReg(Reg);
1128 }
1129
1130 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1131 } break;
1132 }
1133
1134 unsigned Opc = 0;
1135 const TargetRegisterClass *RC = nullptr;
1136 bool TryFold = false;
1137 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
1138 RC = &AArch64::GPR64RegClass;
1139 Opc = AArch64::CSELXr;
1140 TryFold = true;
1141 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
1142 RC = &AArch64::GPR32RegClass;
1143 Opc = AArch64::CSELWr;
1144 TryFold = true;
1145 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
1146 RC = &AArch64::FPR64RegClass;
1147 Opc = AArch64::FCSELDrrr;
1148 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
1149 RC = &AArch64::FPR32RegClass;
1150 Opc = AArch64::FCSELSrrr;
1151 }
1152 assert(RC && "Unsupported regclass");
1153
1154 // Try folding simple instructions into the csel.
1155 if (TryFold) {
1156 unsigned NewReg = 0;
1157 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
1158 if (FoldedOpc) {
1159 // The folded opcodes csinc, csinc and csneg apply the operation to
1160 // FalseReg, so we need to invert the condition.
1162 TrueReg = FalseReg;
1163 } else
1164 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
1165
1166 // Fold the operation. Leave any dead instructions for DCE to clean up.
1167 if (FoldedOpc) {
1168 FalseReg = NewReg;
1169 Opc = FoldedOpc;
1170 // Extend the live range of NewReg.
1171 MRI.clearKillFlags(NewReg);
1172 }
1173 }
1174
1175 // Pull all virtual register into the appropriate class.
1176 MRI.constrainRegClass(TrueReg, RC);
1177 // FalseReg might be WZR or XZR if the folded operand is a literal 1.
1178 assert(
1179 (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
1180 FalseReg == AArch64::XZR) &&
1181 "FalseReg was folded into a non-virtual register other than WZR or XZR");
1182 if (FalseReg.isVirtual())
1183 MRI.constrainRegClass(FalseReg, RC);
1184
1185 // Insert the csel.
1186 BuildMI(MBB, I, DL, get(Opc), DstReg)
1187 .addReg(TrueReg)
1188 .addReg(FalseReg)
1189 .addImm(CC);
1190}
1191
1192// Return true if Imm can be loaded into a register by a "cheap" sequence of
1193// instructions. For now, "cheap" means at most two instructions.
1194static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1195 if (BitSize == 32)
1196 return true;
1197
1198 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1199 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1201 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1202
1203 return Is.size() <= 2;
1204}
1205
1206// Check if a COPY instruction is cheap.
1207static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI) {
1208 assert(MI.isCopy() && "Expected COPY instruction");
1209 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1210
1211 // Cross-bank copies (e.g., between GPR and FPR) are expensive on AArch64,
1212 // typically requiring an FMOV instruction with a 2-6 cycle latency.
1213 auto GetRegClass = [&](Register Reg) -> const TargetRegisterClass * {
1214 if (Reg.isVirtual())
1215 return MRI.getRegClass(Reg);
1216 if (Reg.isPhysical())
1217 return RI.getMinimalPhysRegClass(Reg);
1218 return nullptr;
1219 };
1220 const TargetRegisterClass *DstRC = GetRegClass(MI.getOperand(0).getReg());
1221 const TargetRegisterClass *SrcRC = GetRegClass(MI.getOperand(1).getReg());
1222 if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC))
1223 return false;
1224
1225 return MI.isAsCheapAsAMove();
1226}
1227
1228// FIXME: this implementation should be micro-architecture dependent, so a
1229// micro-architecture target hook should be introduced here in future.
1231 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1232 if (isExynosCheapAsMove(MI))
1233 return true;
1234 return MI.isAsCheapAsAMove();
1235 }
1236
1237 switch (MI.getOpcode()) {
1238 default:
1239 return MI.isAsCheapAsAMove();
1240
1241 case TargetOpcode::COPY:
1242 return isCheapCopy(MI, RI);
1243
1244 case AArch64::ADDWrs:
1245 case AArch64::ADDXrs:
1246 case AArch64::SUBWrs:
1247 case AArch64::SUBXrs:
1248 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1249
1250 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1251 // ORRXri, it is as cheap as MOV.
1252 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1253 case AArch64::MOVi32imm:
1254 return isCheapImmediate(MI, 32);
1255 case AArch64::MOVi64imm:
1256 return isCheapImmediate(MI, 64);
1257 }
1258}
1259
1260bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1261 switch (MI.getOpcode()) {
1262 default:
1263 return false;
1264
1265 case AArch64::ADDWrs:
1266 case AArch64::ADDXrs:
1267 case AArch64::ADDSWrs:
1268 case AArch64::ADDSXrs: {
1269 unsigned Imm = MI.getOperand(3).getImm();
1270 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1271 if (ShiftVal == 0)
1272 return true;
1273 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1274 }
1275
1276 case AArch64::ADDWrx:
1277 case AArch64::ADDXrx:
1278 case AArch64::ADDXrx64:
1279 case AArch64::ADDSWrx:
1280 case AArch64::ADDSXrx:
1281 case AArch64::ADDSXrx64: {
1282 unsigned Imm = MI.getOperand(3).getImm();
1283 switch (AArch64_AM::getArithExtendType(Imm)) {
1284 default:
1285 return false;
1286 case AArch64_AM::UXTB:
1287 case AArch64_AM::UXTH:
1288 case AArch64_AM::UXTW:
1289 case AArch64_AM::UXTX:
1290 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1291 }
1292 }
1293
1294 case AArch64::SUBWrs:
1295 case AArch64::SUBSWrs: {
1296 unsigned Imm = MI.getOperand(3).getImm();
1297 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1298 return ShiftVal == 0 ||
1299 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1300 }
1301
1302 case AArch64::SUBXrs:
1303 case AArch64::SUBSXrs: {
1304 unsigned Imm = MI.getOperand(3).getImm();
1305 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1306 return ShiftVal == 0 ||
1307 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1308 }
1309
1310 case AArch64::SUBWrx:
1311 case AArch64::SUBXrx:
1312 case AArch64::SUBXrx64:
1313 case AArch64::SUBSWrx:
1314 case AArch64::SUBSXrx:
1315 case AArch64::SUBSXrx64: {
1316 unsigned Imm = MI.getOperand(3).getImm();
1317 switch (AArch64_AM::getArithExtendType(Imm)) {
1318 default:
1319 return false;
1320 case AArch64_AM::UXTB:
1321 case AArch64_AM::UXTH:
1322 case AArch64_AM::UXTW:
1323 case AArch64_AM::UXTX:
1324 return AArch64_AM::getArithShiftValue(Imm) == 0;
1325 }
1326 }
1327
1328 case AArch64::LDRBBroW:
1329 case AArch64::LDRBBroX:
1330 case AArch64::LDRBroW:
1331 case AArch64::LDRBroX:
1332 case AArch64::LDRDroW:
1333 case AArch64::LDRDroX:
1334 case AArch64::LDRHHroW:
1335 case AArch64::LDRHHroX:
1336 case AArch64::LDRHroW:
1337 case AArch64::LDRHroX:
1338 case AArch64::LDRQroW:
1339 case AArch64::LDRQroX:
1340 case AArch64::LDRSBWroW:
1341 case AArch64::LDRSBWroX:
1342 case AArch64::LDRSBXroW:
1343 case AArch64::LDRSBXroX:
1344 case AArch64::LDRSHWroW:
1345 case AArch64::LDRSHWroX:
1346 case AArch64::LDRSHXroW:
1347 case AArch64::LDRSHXroX:
1348 case AArch64::LDRSWroW:
1349 case AArch64::LDRSWroX:
1350 case AArch64::LDRSroW:
1351 case AArch64::LDRSroX:
1352 case AArch64::LDRWroW:
1353 case AArch64::LDRWroX:
1354 case AArch64::LDRXroW:
1355 case AArch64::LDRXroX:
1356 case AArch64::PRFMroW:
1357 case AArch64::PRFMroX:
1358 case AArch64::STRBBroW:
1359 case AArch64::STRBBroX:
1360 case AArch64::STRBroW:
1361 case AArch64::STRBroX:
1362 case AArch64::STRDroW:
1363 case AArch64::STRDroX:
1364 case AArch64::STRHHroW:
1365 case AArch64::STRHHroX:
1366 case AArch64::STRHroW:
1367 case AArch64::STRHroX:
1368 case AArch64::STRQroW:
1369 case AArch64::STRQroX:
1370 case AArch64::STRSroW:
1371 case AArch64::STRSroX:
1372 case AArch64::STRWroW:
1373 case AArch64::STRWroX:
1374 case AArch64::STRXroW:
1375 case AArch64::STRXroX: {
1376 unsigned IsSigned = MI.getOperand(3).getImm();
1377 return !IsSigned;
1378 }
1379 }
1380}
1381
1382bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1383 unsigned Opc = MI.getOpcode();
1384 switch (Opc) {
1385 default:
1386 return false;
1387 case AArch64::SEH_StackAlloc:
1388 case AArch64::SEH_SaveFPLR:
1389 case AArch64::SEH_SaveFPLR_X:
1390 case AArch64::SEH_SaveReg:
1391 case AArch64::SEH_SaveReg_X:
1392 case AArch64::SEH_SaveRegP:
1393 case AArch64::SEH_SaveRegP_X:
1394 case AArch64::SEH_SaveFReg:
1395 case AArch64::SEH_SaveFReg_X:
1396 case AArch64::SEH_SaveFRegP:
1397 case AArch64::SEH_SaveFRegP_X:
1398 case AArch64::SEH_SetFP:
1399 case AArch64::SEH_AddFP:
1400 case AArch64::SEH_Nop:
1401 case AArch64::SEH_PrologEnd:
1402 case AArch64::SEH_EpilogStart:
1403 case AArch64::SEH_EpilogEnd:
1404 case AArch64::SEH_PACSignLR:
1405 case AArch64::SEH_SaveAnyRegI:
1406 case AArch64::SEH_SaveAnyRegIP:
1407 case AArch64::SEH_SaveAnyRegQP:
1408 case AArch64::SEH_SaveAnyRegQPX:
1409 case AArch64::SEH_AllocZ:
1410 case AArch64::SEH_SaveZReg:
1411 case AArch64::SEH_SavePReg:
1412 return true;
1413 }
1414}
1415
1417 Register &SrcReg, Register &DstReg,
1418 unsigned &SubIdx) const {
1419 switch (MI.getOpcode()) {
1420 default:
1421 return false;
1422 case AArch64::SBFMXri: // aka sxtw
1423 case AArch64::UBFMXri: // aka uxtw
1424 // Check for the 32 -> 64 bit extension case, these instructions can do
1425 // much more.
1426 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1427 return false;
1428 // This is a signed or unsigned 32 -> 64 bit extension.
1429 SrcReg = MI.getOperand(1).getReg();
1430 DstReg = MI.getOperand(0).getReg();
1431 SubIdx = AArch64::sub_32;
1432 return true;
1433 }
1434}
1435
1437 const MachineInstr &MIa, const MachineInstr &MIb) const {
1439 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1440 int64_t OffsetA = 0, OffsetB = 0;
1441 TypeSize WidthA(0, false), WidthB(0, false);
1442 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1443
1444 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1445 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1446
1449 return false;
1450
1451 // Retrieve the base, offset from the base and width. Width
1452 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1453 // base are identical, and the offset of a lower memory access +
1454 // the width doesn't overlap the offset of a higher memory access,
1455 // then the memory accesses are different.
1456 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1457 // are assumed to have the same scale (vscale).
1458 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1459 WidthA, TRI) &&
1460 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1461 WidthB, TRI)) {
1462 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1463 OffsetAIsScalable == OffsetBIsScalable) {
1464 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1465 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1466 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1467 if (LowWidth.isScalable() == OffsetAIsScalable &&
1468 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1469 return true;
1470 }
1471 }
1472 return false;
1473}
1474
1476 const MachineBasicBlock *MBB,
1477 const MachineFunction &MF) const {
1479 return true;
1480
1481 // Do not move an instruction that can be recognized as a branch target.
1482 if (hasBTISemantics(MI))
1483 return true;
1484
1485 switch (MI.getOpcode()) {
1486 case AArch64::HINT:
1487 // CSDB hints are scheduling barriers.
1488 if (MI.getOperand(0).getImm() == 0x14)
1489 return true;
1490 break;
1491 case AArch64::DSB:
1492 case AArch64::ISB:
1493 // DSB and ISB also are scheduling barriers.
1494 return true;
1495 case AArch64::MSRpstatesvcrImm1:
1496 // SMSTART and SMSTOP are also scheduling barriers.
1497 return true;
1498 default:;
1499 }
1500 if (isSEHInstruction(MI))
1501 return true;
1502 auto Next = std::next(MI.getIterator());
1503 return Next != MBB->end() && Next->isCFIInstruction();
1504}
1505
1506/// analyzeCompare - For a comparison instruction, return the source registers
1507/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1508/// Return true if the comparison instruction can be analyzed.
1510 Register &SrcReg2, int64_t &CmpMask,
1511 int64_t &CmpValue) const {
1512 // The first operand can be a frame index where we'd normally expect a
1513 // register.
1514 // FIXME: Pass subregisters out of analyzeCompare
1515 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1516 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1517 return false;
1518
1519 switch (MI.getOpcode()) {
1520 default:
1521 break;
1522 case AArch64::PTEST_PP:
1523 case AArch64::PTEST_PP_ANY:
1524 case AArch64::PTEST_PP_FIRST:
1525 SrcReg = MI.getOperand(0).getReg();
1526 SrcReg2 = MI.getOperand(1).getReg();
1527 if (MI.getOperand(2).getSubReg())
1528 return false;
1529
1530 // Not sure about the mask and value for now...
1531 CmpMask = ~0;
1532 CmpValue = 0;
1533 return true;
1534 case AArch64::SUBSWrr:
1535 case AArch64::SUBSWrs:
1536 case AArch64::SUBSWrx:
1537 case AArch64::SUBSXrr:
1538 case AArch64::SUBSXrs:
1539 case AArch64::SUBSXrx:
1540 case AArch64::ADDSWrr:
1541 case AArch64::ADDSWrs:
1542 case AArch64::ADDSWrx:
1543 case AArch64::ADDSXrr:
1544 case AArch64::ADDSXrs:
1545 case AArch64::ADDSXrx:
1546 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1547 SrcReg = MI.getOperand(1).getReg();
1548 SrcReg2 = MI.getOperand(2).getReg();
1549
1550 // FIXME: Pass subregisters out of analyzeCompare
1551 if (MI.getOperand(2).getSubReg())
1552 return false;
1553
1554 CmpMask = ~0;
1555 CmpValue = 0;
1556 return true;
1557 case AArch64::SUBSWri:
1558 case AArch64::ADDSWri:
1559 case AArch64::SUBSXri:
1560 case AArch64::ADDSXri:
1561 SrcReg = MI.getOperand(1).getReg();
1562 SrcReg2 = 0;
1563 CmpMask = ~0;
1564 CmpValue = MI.getOperand(2).getImm();
1565 return true;
1566 case AArch64::ANDSWri:
1567 case AArch64::ANDSXri:
1568 // ANDS does not use the same encoding scheme as the others xxxS
1569 // instructions.
1570 SrcReg = MI.getOperand(1).getReg();
1571 SrcReg2 = 0;
1572 CmpMask = ~0;
1574 MI.getOperand(2).getImm(),
1575 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1576 return true;
1577 }
1578
1579 return false;
1580}
1581
1583 MachineBasicBlock *MBB = Instr.getParent();
1584 assert(MBB && "Can't get MachineBasicBlock here");
1585 MachineFunction *MF = MBB->getParent();
1586 assert(MF && "Can't get MachineFunction here");
1590
1591 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1592 ++OpIdx) {
1593 MachineOperand &MO = Instr.getOperand(OpIdx);
1594 const TargetRegisterClass *OpRegCstraints =
1595 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1596
1597 // If there's no constraint, there's nothing to do.
1598 if (!OpRegCstraints)
1599 continue;
1600 // If the operand is a frame index, there's nothing to do here.
1601 // A frame index operand will resolve correctly during PEI.
1602 if (MO.isFI())
1603 continue;
1604
1605 assert(MO.isReg() &&
1606 "Operand has register constraints without being a register!");
1607
1608 Register Reg = MO.getReg();
1609 if (Reg.isPhysical()) {
1610 if (!OpRegCstraints->contains(Reg))
1611 return false;
1612 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1613 !MRI->constrainRegClass(Reg, OpRegCstraints))
1614 return false;
1615 }
1616
1617 return true;
1618}
1619
1620/// Return the opcode that does not set flags when possible - otherwise
1621/// return the original opcode. The caller is responsible to do the actual
1622/// substitution and legality checking.
1624 // Don't convert all compare instructions, because for some the zero register
1625 // encoding becomes the sp register.
1626 bool MIDefinesZeroReg = false;
1627 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1628 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1629 MIDefinesZeroReg = true;
1630
1631 switch (MI.getOpcode()) {
1632 default:
1633 return MI.getOpcode();
1634 case AArch64::ADDSWrr:
1635 return AArch64::ADDWrr;
1636 case AArch64::ADDSWri:
1637 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1638 case AArch64::ADDSWrs:
1639 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1640 case AArch64::ADDSWrx:
1641 return AArch64::ADDWrx;
1642 case AArch64::ADDSXrr:
1643 return AArch64::ADDXrr;
1644 case AArch64::ADDSXri:
1645 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1646 case AArch64::ADDSXrs:
1647 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1648 case AArch64::ADDSXrx:
1649 return AArch64::ADDXrx;
1650 case AArch64::SUBSWrr:
1651 return AArch64::SUBWrr;
1652 case AArch64::SUBSWri:
1653 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1654 case AArch64::SUBSWrs:
1655 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1656 case AArch64::SUBSWrx:
1657 return AArch64::SUBWrx;
1658 case AArch64::SUBSXrr:
1659 return AArch64::SUBXrr;
1660 case AArch64::SUBSXri:
1661 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1662 case AArch64::SUBSXrs:
1663 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1664 case AArch64::SUBSXrx:
1665 return AArch64::SUBXrx;
1666 }
1667}
1668
1669enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1670
1671/// True when condition flags are accessed (either by writing or reading)
1672/// on the instruction trace starting at From and ending at To.
1673///
1674/// Note: If From and To are from different blocks it's assumed CC are accessed
1675/// on the path.
1678 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1679 // Early exit if To is at the beginning of the BB.
1680 if (To == To->getParent()->begin())
1681 return true;
1682
1683 // Check whether the instructions are in the same basic block
1684 // If not, assume the condition flags might get modified somewhere.
1685 if (To->getParent() != From->getParent())
1686 return true;
1687
1688 // From must be above To.
1689 assert(std::any_of(
1690 ++To.getReverse(), To->getParent()->rend(),
1691 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1692
1693 // We iterate backward starting at \p To until we hit \p From.
1694 for (const MachineInstr &Instr :
1696 if (((AccessToCheck & AK_Write) &&
1697 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1698 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1699 return true;
1700 }
1701 return false;
1702}
1703
1704std::optional<unsigned>
1705AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1706 MachineInstr *Pred,
1707 const MachineRegisterInfo *MRI) const {
1708 unsigned MaskOpcode = Mask->getOpcode();
1709 unsigned PredOpcode = Pred->getOpcode();
1710 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1711 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1712
1713 if (PredIsWhileLike) {
1714 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1715 // instruction and the condition is "any" since WHILcc does an implicit
1716 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1717 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1718 return PredOpcode;
1719
1720 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1721 // redundant since WHILE performs an implicit PTEST with an all active
1722 // mask.
1723 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1724 getElementSizeForOpcode(MaskOpcode) ==
1725 getElementSizeForOpcode(PredOpcode))
1726 return PredOpcode;
1727
1728 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1729 // WHILEcc performs an implicit PTEST with an all active mask, setting
1730 // the N flag as the PTEST_FIRST would.
1731 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1732 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1733 return PredOpcode;
1734
1735 return {};
1736 }
1737
1738 if (PredIsPTestLike) {
1739 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1740 // instruction that sets the flags as PTEST would and the condition is
1741 // "any" since PG is always a subset of the governing predicate of the
1742 // ptest-like instruction.
1743 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1744 return PredOpcode;
1745
1746 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1747
1748 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1749 // to look through a copy and try again. This is because some instructions
1750 // take a predicate whose register class is a subset of its result class.
1751 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1752 PTestLikeMask->getOperand(1).getReg().isVirtual())
1753 PTestLikeMask =
1754 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1755
1756 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1757 // the element size matches and either the PTEST_LIKE instruction uses
1758 // the same all active mask or the condition is "any".
1759 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1760 getElementSizeForOpcode(MaskOpcode) ==
1761 getElementSizeForOpcode(PredOpcode)) {
1762 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1763 return PredOpcode;
1764 }
1765
1766 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1767 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1768 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1769 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1770 // performed by the compare could consider fewer lanes for these element
1771 // sizes.
1772 //
1773 // For example, consider
1774 //
1775 // ptrue p0.b ; P0=1111-1111-1111-1111
1776 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1777 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1778 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1779 // ; ^ last active
1780 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1781 // ; ^ last active
1782 //
1783 // where the compare generates a canonical all active 32-bit predicate
1784 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1785 // active flag, whereas the PTEST instruction with the same mask doesn't.
1786 // For PTEST_ANY this doesn't apply as the flags in this case would be
1787 // identical regardless of element size.
1788 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1789 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1790 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1791 return PredOpcode;
1792
1793 return {};
1794 }
1795
1796 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1797 // opcode so the PTEST becomes redundant.
1798 switch (PredOpcode) {
1799 case AArch64::AND_PPzPP:
1800 case AArch64::BIC_PPzPP:
1801 case AArch64::EOR_PPzPP:
1802 case AArch64::NAND_PPzPP:
1803 case AArch64::NOR_PPzPP:
1804 case AArch64::ORN_PPzPP:
1805 case AArch64::ORR_PPzPP:
1806 case AArch64::BRKA_PPzP:
1807 case AArch64::BRKPA_PPzPP:
1808 case AArch64::BRKB_PPzP:
1809 case AArch64::BRKPB_PPzPP:
1810 case AArch64::RDFFR_PPz: {
1811 // Check to see if our mask is the same. If not the resulting flag bits
1812 // may be different and we can't remove the ptest.
1813 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1814 if (Mask != PredMask)
1815 return {};
1816 break;
1817 }
1818 case AArch64::BRKN_PPzP: {
1819 // BRKN uses an all active implicit mask to set flags unlike the other
1820 // flag-setting instructions.
1821 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1822 if ((MaskOpcode != AArch64::PTRUE_B) ||
1823 (Mask->getOperand(1).getImm() != 31))
1824 return {};
1825 break;
1826 }
1827 case AArch64::PTRUE_B:
1828 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1829 break;
1830 default:
1831 // Bail out if we don't recognize the input
1832 return {};
1833 }
1834
1835 return convertToFlagSettingOpc(PredOpcode);
1836}
1837
1838/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1839/// operation which could set the flags in an identical manner
1840bool AArch64InstrInfo::optimizePTestInstr(
1841 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1842 const MachineRegisterInfo *MRI) const {
1843 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1844 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1845
1846 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1847 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1848 // before the branch to extract each subregister.
1849 auto Op = Pred->getOperand(1);
1850 if (Op.isReg() && Op.getReg().isVirtual() &&
1851 Op.getSubReg() == AArch64::psub0)
1852 Pred = MRI->getUniqueVRegDef(Op.getReg());
1853 }
1854
1855 unsigned PredOpcode = Pred->getOpcode();
1856 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1857 if (!NewOp)
1858 return false;
1859
1860 const TargetRegisterInfo *TRI = &getRegisterInfo();
1861
1862 // If another instruction between Pred and PTest accesses flags, don't remove
1863 // the ptest or update the earlier instruction to modify them.
1864 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1865 return false;
1866
1867 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1868 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1869 // operand to be replaced with an equivalent instruction that also sets the
1870 // flags.
1871 PTest->eraseFromParent();
1872 if (*NewOp != PredOpcode) {
1873 Pred->setDesc(get(*NewOp));
1874 bool succeeded = UpdateOperandRegClass(*Pred);
1875 (void)succeeded;
1876 assert(succeeded && "Operands have incompatible register classes!");
1877 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1878 }
1879
1880 // Ensure that the flags def is live.
1881 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1882 unsigned i = 0, e = Pred->getNumOperands();
1883 for (; i != e; ++i) {
1884 MachineOperand &MO = Pred->getOperand(i);
1885 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1886 MO.setIsDead(false);
1887 break;
1888 }
1889 }
1890 }
1891 return true;
1892}
1893
1894/// Try to optimize a compare instruction. A compare instruction is an
1895/// instruction which produces AArch64::NZCV. It can be truly compare
1896/// instruction
1897/// when there are no uses of its destination register.
1898///
1899/// The following steps are tried in order:
1900/// 1. Convert CmpInstr into an unconditional version.
1901/// 2. Remove CmpInstr if above there is an instruction producing a needed
1902/// condition code or an instruction which can be converted into such an
1903/// instruction.
1904/// Only comparison with zero is supported.
1906 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1907 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1908 assert(CmpInstr.getParent());
1909 assert(MRI);
1910
1911 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1912 int DeadNZCVIdx =
1913 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1914 if (DeadNZCVIdx != -1) {
1915 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1916 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1917 CmpInstr.eraseFromParent();
1918 return true;
1919 }
1920 unsigned Opc = CmpInstr.getOpcode();
1921 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1922 if (NewOpc == Opc)
1923 return false;
1924 const MCInstrDesc &MCID = get(NewOpc);
1925 CmpInstr.setDesc(MCID);
1926 CmpInstr.removeOperand(DeadNZCVIdx);
1927 bool succeeded = UpdateOperandRegClass(CmpInstr);
1928 (void)succeeded;
1929 assert(succeeded && "Some operands reg class are incompatible!");
1930 return true;
1931 }
1932
1933 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1934 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1935 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1936 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1937
1938 if (SrcReg2 != 0)
1939 return false;
1940
1941 // CmpInstr is a Compare instruction if destination register is not used.
1942 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1943 return false;
1944
1945 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1946 return true;
1947 return (CmpValue == 0 || CmpValue == 1) &&
1948 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1949}
1950
1951/// Get opcode of S version of Instr.
1952/// If Instr is S version its opcode is returned.
1953/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1954/// or we are not interested in it.
1955static unsigned sForm(MachineInstr &Instr) {
1956 switch (Instr.getOpcode()) {
1957 default:
1958 return AArch64::INSTRUCTION_LIST_END;
1959
1960 case AArch64::ADDSWrr:
1961 case AArch64::ADDSWri:
1962 case AArch64::ADDSXrr:
1963 case AArch64::ADDSXri:
1964 case AArch64::ADDSWrx:
1965 case AArch64::ADDSXrx:
1966 case AArch64::SUBSWrr:
1967 case AArch64::SUBSWri:
1968 case AArch64::SUBSWrx:
1969 case AArch64::SUBSXrr:
1970 case AArch64::SUBSXri:
1971 case AArch64::SUBSXrx:
1972 case AArch64::ANDSWri:
1973 case AArch64::ANDSWrr:
1974 case AArch64::ANDSWrs:
1975 case AArch64::ANDSXri:
1976 case AArch64::ANDSXrr:
1977 case AArch64::ANDSXrs:
1978 case AArch64::BICSWrr:
1979 case AArch64::BICSXrr:
1980 case AArch64::BICSWrs:
1981 case AArch64::BICSXrs:
1982 return Instr.getOpcode();
1983
1984 case AArch64::ADDWrr:
1985 return AArch64::ADDSWrr;
1986 case AArch64::ADDWri:
1987 return AArch64::ADDSWri;
1988 case AArch64::ADDXrr:
1989 return AArch64::ADDSXrr;
1990 case AArch64::ADDXri:
1991 return AArch64::ADDSXri;
1992 case AArch64::ADDWrx:
1993 return AArch64::ADDSWrx;
1994 case AArch64::ADDXrx:
1995 return AArch64::ADDSXrx;
1996 case AArch64::ADCWr:
1997 return AArch64::ADCSWr;
1998 case AArch64::ADCXr:
1999 return AArch64::ADCSXr;
2000 case AArch64::SUBWrr:
2001 return AArch64::SUBSWrr;
2002 case AArch64::SUBWri:
2003 return AArch64::SUBSWri;
2004 case AArch64::SUBXrr:
2005 return AArch64::SUBSXrr;
2006 case AArch64::SUBXri:
2007 return AArch64::SUBSXri;
2008 case AArch64::SUBWrx:
2009 return AArch64::SUBSWrx;
2010 case AArch64::SUBXrx:
2011 return AArch64::SUBSXrx;
2012 case AArch64::SBCWr:
2013 return AArch64::SBCSWr;
2014 case AArch64::SBCXr:
2015 return AArch64::SBCSXr;
2016 case AArch64::ANDWri:
2017 return AArch64::ANDSWri;
2018 case AArch64::ANDXri:
2019 return AArch64::ANDSXri;
2020 case AArch64::ANDWrr:
2021 return AArch64::ANDSWrr;
2022 case AArch64::ANDWrs:
2023 return AArch64::ANDSWrs;
2024 case AArch64::ANDXrr:
2025 return AArch64::ANDSXrr;
2026 case AArch64::ANDXrs:
2027 return AArch64::ANDSXrs;
2028 case AArch64::BICWrr:
2029 return AArch64::BICSWrr;
2030 case AArch64::BICXrr:
2031 return AArch64::BICSXrr;
2032 case AArch64::BICWrs:
2033 return AArch64::BICSWrs;
2034 case AArch64::BICXrs:
2035 return AArch64::BICSXrs;
2036 }
2037}
2038
2039/// Check if AArch64::NZCV should be alive in successors of MBB.
2041 for (auto *BB : MBB->successors())
2042 if (BB->isLiveIn(AArch64::NZCV))
2043 return true;
2044 return false;
2045}
2046
2047/// \returns The condition code operand index for \p Instr if it is a branch
2048/// or select and -1 otherwise.
2049static int
2051 switch (Instr.getOpcode()) {
2052 default:
2053 return -1;
2054
2055 case AArch64::Bcc: {
2056 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2057 assert(Idx >= 2);
2058 return Idx - 2;
2059 }
2060
2061 case AArch64::CSINVWr:
2062 case AArch64::CSINVXr:
2063 case AArch64::CSINCWr:
2064 case AArch64::CSINCXr:
2065 case AArch64::CSELWr:
2066 case AArch64::CSELXr:
2067 case AArch64::CSNEGWr:
2068 case AArch64::CSNEGXr:
2069 case AArch64::FCSELSrrr:
2070 case AArch64::FCSELDrrr: {
2071 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2072 assert(Idx >= 1);
2073 return Idx - 1;
2074 }
2075 }
2076}
2077
2078/// Find a condition code used by the instruction.
2079/// Returns AArch64CC::Invalid if either the instruction does not use condition
2080/// codes or we don't optimize CmpInstr in the presence of such instructions.
2083 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
2084 Instr.getOperand(CCIdx).getImm())
2086}
2087
2090 UsedNZCV UsedFlags;
2091 switch (CC) {
2092 default:
2093 break;
2094
2095 case AArch64CC::EQ: // Z set
2096 case AArch64CC::NE: // Z clear
2097 UsedFlags.Z = true;
2098 break;
2099
2100 case AArch64CC::HI: // Z clear and C set
2101 case AArch64CC::LS: // Z set or C clear
2102 UsedFlags.Z = true;
2103 [[fallthrough]];
2104 case AArch64CC::HS: // C set
2105 case AArch64CC::LO: // C clear
2106 UsedFlags.C = true;
2107 break;
2108
2109 case AArch64CC::MI: // N set
2110 case AArch64CC::PL: // N clear
2111 UsedFlags.N = true;
2112 break;
2113
2114 case AArch64CC::VS: // V set
2115 case AArch64CC::VC: // V clear
2116 UsedFlags.V = true;
2117 break;
2118
2119 case AArch64CC::GT: // Z clear, N and V the same
2120 case AArch64CC::LE: // Z set, N and V differ
2121 UsedFlags.Z = true;
2122 [[fallthrough]];
2123 case AArch64CC::GE: // N and V the same
2124 case AArch64CC::LT: // N and V differ
2125 UsedFlags.N = true;
2126 UsedFlags.V = true;
2127 break;
2128 }
2129 return UsedFlags;
2130}
2131
2132/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
2133/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
2134/// \returns std::nullopt otherwise.
2135///
2136/// Collect instructions using that flags in \p CCUseInstrs if provided.
2137std::optional<UsedNZCV>
2139 const TargetRegisterInfo &TRI,
2140 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
2141 MachineBasicBlock *CmpParent = CmpInstr.getParent();
2142 if (MI.getParent() != CmpParent)
2143 return std::nullopt;
2144
2145 if (areCFlagsAliveInSuccessors(CmpParent))
2146 return std::nullopt;
2147
2148 UsedNZCV NZCVUsedAfterCmp;
2150 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
2151 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
2153 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
2154 return std::nullopt;
2155 NZCVUsedAfterCmp |= getUsedNZCV(CC);
2156 if (CCUseInstrs)
2157 CCUseInstrs->push_back(&Instr);
2158 }
2159 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
2160 break;
2161 }
2162 return NZCVUsedAfterCmp;
2163}
2164
2165static bool isADDSRegImm(unsigned Opcode) {
2166 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
2167}
2168
2169static bool isSUBSRegImm(unsigned Opcode) {
2170 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
2171}
2172
2174 unsigned Opc = sForm(MI);
2175 switch (Opc) {
2176 case AArch64::ANDSWri:
2177 case AArch64::ANDSWrr:
2178 case AArch64::ANDSWrs:
2179 case AArch64::ANDSXri:
2180 case AArch64::ANDSXrr:
2181 case AArch64::ANDSXrs:
2182 case AArch64::BICSWrr:
2183 case AArch64::BICSXrr:
2184 case AArch64::BICSWrs:
2185 case AArch64::BICSXrs:
2186 return true;
2187 default:
2188 return false;
2189 }
2190}
2191
2192/// Check if CmpInstr can be substituted by MI.
2193///
2194/// CmpInstr can be substituted:
2195/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2196/// - and, MI and CmpInstr are from the same MachineBB
2197/// - and, condition flags are not alive in successors of the CmpInstr parent
2198/// - and, if MI opcode is the S form there must be no defs of flags between
2199/// MI and CmpInstr
2200/// or if MI opcode is not the S form there must be neither defs of flags
2201/// nor uses of flags between MI and CmpInstr.
2202/// - and, if C/V flags are not used after CmpInstr
2203/// or if N flag is used but MI produces poison value if signed overflow
2204/// occurs.
2206 const TargetRegisterInfo &TRI) {
2207 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
2208 // that may or may not set flags.
2209 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
2210
2211 const unsigned CmpOpcode = CmpInstr.getOpcode();
2212 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
2213 return false;
2214
2215 assert((CmpInstr.getOperand(2).isImm() &&
2216 CmpInstr.getOperand(2).getImm() == 0) &&
2217 "Caller guarantees that CmpInstr compares with constant 0");
2218
2219 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
2220 if (!NZVCUsed || NZVCUsed->C)
2221 return false;
2222
2223 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
2224 // '%vreg = add ...' or '%vreg = sub ...'.
2225 // Condition flag V is used to indicate signed overflow.
2226 // 1) MI and CmpInstr set N and V to the same value.
2227 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
2228 // signed overflow occurs, so CmpInstr could still be simplified away.
2229 // Note that Ands and Bics instructions always clear the V flag.
2230 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
2231 return false;
2232
2233 AccessKind AccessToCheck = AK_Write;
2234 if (sForm(MI) != MI.getOpcode())
2235 AccessToCheck = AK_All;
2236 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
2237}
2238
2239/// Substitute an instruction comparing to zero with another instruction
2240/// which produces needed condition flags.
2241///
2242/// Return true on success.
2243bool AArch64InstrInfo::substituteCmpToZero(
2244 MachineInstr &CmpInstr, unsigned SrcReg,
2245 const MachineRegisterInfo &MRI) const {
2246 // Get the unique definition of SrcReg.
2247 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2248 if (!MI)
2249 return false;
2250
2251 const TargetRegisterInfo &TRI = getRegisterInfo();
2252
2253 unsigned NewOpc = sForm(*MI);
2254 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
2255 return false;
2256
2257 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
2258 return false;
2259
2260 // Update the instruction to set NZCV.
2261 MI->setDesc(get(NewOpc));
2262 CmpInstr.eraseFromParent();
2264 (void)succeeded;
2265 assert(succeeded && "Some operands reg class are incompatible!");
2266 MI->addRegisterDefined(AArch64::NZCV, &TRI);
2267 return true;
2268}
2269
2270/// \returns True if \p CmpInstr can be removed.
2271///
2272/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
2273/// codes used in \p CCUseInstrs must be inverted.
2275 int CmpValue, const TargetRegisterInfo &TRI,
2277 bool &IsInvertCC) {
2278 assert((CmpValue == 0 || CmpValue == 1) &&
2279 "Only comparisons to 0 or 1 considered for removal!");
2280
2281 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2282 unsigned MIOpc = MI.getOpcode();
2283 if (MIOpc == AArch64::CSINCWr) {
2284 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2285 MI.getOperand(2).getReg() != AArch64::WZR)
2286 return false;
2287 } else if (MIOpc == AArch64::CSINCXr) {
2288 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2289 MI.getOperand(2).getReg() != AArch64::XZR)
2290 return false;
2291 } else {
2292 return false;
2293 }
2295 if (MICC == AArch64CC::Invalid)
2296 return false;
2297
2298 // NZCV needs to be defined
2299 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2300 return false;
2301
2302 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2303 const unsigned CmpOpcode = CmpInstr.getOpcode();
2304 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2305 if (CmpValue && !IsSubsRegImm)
2306 return false;
2307 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2308 return false;
2309
2310 // MI conditions allowed: eq, ne, mi, pl
2311 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2312 if (MIUsedNZCV.C || MIUsedNZCV.V)
2313 return false;
2314
2315 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2316 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2317 // Condition flags are not used in CmpInstr basic block successors and only
2318 // Z or N flags allowed to be used after CmpInstr within its basic block
2319 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2320 return false;
2321 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2322 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2323 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2324 return false;
2325 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2326 if (MIUsedNZCV.N && !CmpValue)
2327 return false;
2328
2329 // There must be no defs of flags between MI and CmpInstr
2330 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2331 return false;
2332
2333 // Condition code is inverted in the following cases:
2334 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2335 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2336 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2337 (!CmpValue && MICC == AArch64CC::NE);
2338 return true;
2339}
2340
2341/// Remove comparison in csinc-cmp sequence
2342///
2343/// Examples:
2344/// 1. \code
2345/// csinc w9, wzr, wzr, ne
2346/// cmp w9, #0
2347/// b.eq
2348/// \endcode
2349/// to
2350/// \code
2351/// csinc w9, wzr, wzr, ne
2352/// b.ne
2353/// \endcode
2354///
2355/// 2. \code
2356/// csinc x2, xzr, xzr, mi
2357/// cmp x2, #1
2358/// b.pl
2359/// \endcode
2360/// to
2361/// \code
2362/// csinc x2, xzr, xzr, mi
2363/// b.pl
2364/// \endcode
2365///
2366/// \param CmpInstr comparison instruction
2367/// \return True when comparison removed
2368bool AArch64InstrInfo::removeCmpToZeroOrOne(
2369 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2370 const MachineRegisterInfo &MRI) const {
2371 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2372 if (!MI)
2373 return false;
2374 const TargetRegisterInfo &TRI = getRegisterInfo();
2375 SmallVector<MachineInstr *, 4> CCUseInstrs;
2376 bool IsInvertCC = false;
2377 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2378 IsInvertCC))
2379 return false;
2380 // Make transformation
2381 CmpInstr.eraseFromParent();
2382 if (IsInvertCC) {
2383 // Invert condition codes in CmpInstr CC users
2384 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2385 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2386 assert(Idx >= 0 && "Unexpected instruction using CC.");
2387 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2389 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2390 CCOperand.setImm(CCUse);
2391 }
2392 }
2393 return true;
2394}
2395
2396bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2397 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2398 MI.getOpcode() != AArch64::CATCHRET)
2399 return false;
2400
2401 MachineBasicBlock &MBB = *MI.getParent();
2402 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2403 auto TRI = Subtarget.getRegisterInfo();
2404 DebugLoc DL = MI.getDebugLoc();
2405
2406 if (MI.getOpcode() == AArch64::CATCHRET) {
2407 // Skip to the first instruction before the epilog.
2408 const TargetInstrInfo *TII =
2410 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2412 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2413 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2414 FirstEpilogSEH != MBB.begin())
2415 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2416 if (FirstEpilogSEH != MBB.begin())
2417 FirstEpilogSEH = std::next(FirstEpilogSEH);
2418 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2419 .addReg(AArch64::X0, RegState::Define)
2420 .addMBB(TargetMBB);
2421 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2422 .addReg(AArch64::X0, RegState::Define)
2423 .addReg(AArch64::X0)
2424 .addMBB(TargetMBB)
2425 .addImm(0);
2426 TargetMBB->setMachineBlockAddressTaken();
2427 return true;
2428 }
2429
2430 Register Reg = MI.getOperand(0).getReg();
2432 if (M.getStackProtectorGuard() == "sysreg") {
2433 const AArch64SysReg::SysReg *SrcReg =
2434 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2435 if (!SrcReg)
2436 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2437
2438 // mrs xN, sysreg
2439 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2441 .addImm(SrcReg->Encoding);
2442 int Offset = M.getStackProtectorGuardOffset();
2443 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2444 // ldr xN, [xN, #offset]
2445 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2446 .addDef(Reg)
2448 .addImm(Offset / 8);
2449 } else if (Offset >= -256 && Offset <= 255) {
2450 // ldur xN, [xN, #offset]
2451 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2452 .addDef(Reg)
2454 .addImm(Offset);
2455 } else if (Offset >= -4095 && Offset <= 4095) {
2456 if (Offset > 0) {
2457 // add xN, xN, #offset
2458 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2459 .addDef(Reg)
2461 .addImm(Offset)
2462 .addImm(0);
2463 } else {
2464 // sub xN, xN, #offset
2465 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2466 .addDef(Reg)
2468 .addImm(-Offset)
2469 .addImm(0);
2470 }
2471 // ldr xN, [xN]
2472 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2473 .addDef(Reg)
2475 .addImm(0);
2476 } else {
2477 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2478 // than 23760.
2479 // It might be nice to use AArch64::MOVi32imm here, which would get
2480 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2481 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2482 // AArch64FrameLowering might help us find such a scratch register
2483 // though. If we failed to find a scratch register, we could emit a
2484 // stream of add instructions to build up the immediate. Or, we could try
2485 // to insert a AArch64::MOVi32imm before register allocation so that we
2486 // didn't need to scavenge for a scratch register.
2487 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2488 }
2489 MBB.erase(MI);
2490 return true;
2491 }
2492
2493 const GlobalValue *GV =
2494 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2495 const TargetMachine &TM = MBB.getParent()->getTarget();
2496 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2497 const unsigned char MO_NC = AArch64II::MO_NC;
2498
2499 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2500 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2501 .addGlobalAddress(GV, 0, OpFlags);
2502 if (Subtarget.isTargetILP32()) {
2503 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2504 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2505 .addDef(Reg32, RegState::Dead)
2507 .addImm(0)
2508 .addMemOperand(*MI.memoperands_begin())
2510 } else {
2511 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2513 .addImm(0)
2514 .addMemOperand(*MI.memoperands_begin());
2515 }
2516 } else if (TM.getCodeModel() == CodeModel::Large) {
2517 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2518 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2519 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2520 .addImm(0);
2521 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2523 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2524 .addImm(16);
2525 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2527 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2528 .addImm(32);
2529 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2532 .addImm(48);
2533 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2535 .addImm(0)
2536 .addMemOperand(*MI.memoperands_begin());
2537 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2538 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2539 .addGlobalAddress(GV, 0, OpFlags);
2540 } else {
2541 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2542 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2543 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2544 if (Subtarget.isTargetILP32()) {
2545 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2546 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2547 .addDef(Reg32, RegState::Dead)
2549 .addGlobalAddress(GV, 0, LoFlags)
2550 .addMemOperand(*MI.memoperands_begin())
2552 } else {
2553 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2555 .addGlobalAddress(GV, 0, LoFlags)
2556 .addMemOperand(*MI.memoperands_begin());
2557 }
2558 }
2559
2560 MBB.erase(MI);
2561
2562 return true;
2563}
2564
2565// Return true if this instruction simply sets its single destination register
2566// to zero. This is equivalent to a register rename of the zero-register.
2568 switch (MI.getOpcode()) {
2569 default:
2570 break;
2571 case AArch64::MOVZWi:
2572 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2573 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2574 assert(MI.getDesc().getNumOperands() == 3 &&
2575 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2576 return true;
2577 }
2578 break;
2579 case AArch64::ANDWri: // and Rd, Rzr, #imm
2580 return MI.getOperand(1).getReg() == AArch64::WZR;
2581 case AArch64::ANDXri:
2582 return MI.getOperand(1).getReg() == AArch64::XZR;
2583 case TargetOpcode::COPY:
2584 return MI.getOperand(1).getReg() == AArch64::WZR;
2585 }
2586 return false;
2587}
2588
2589// Return true if this instruction simply renames a general register without
2590// modifying bits.
2592 switch (MI.getOpcode()) {
2593 default:
2594 break;
2595 case TargetOpcode::COPY: {
2596 // GPR32 copies will by lowered to ORRXrs
2597 Register DstReg = MI.getOperand(0).getReg();
2598 return (AArch64::GPR32RegClass.contains(DstReg) ||
2599 AArch64::GPR64RegClass.contains(DstReg));
2600 }
2601 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2602 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2603 assert(MI.getDesc().getNumOperands() == 4 &&
2604 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2605 return true;
2606 }
2607 break;
2608 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2609 if (MI.getOperand(2).getImm() == 0) {
2610 assert(MI.getDesc().getNumOperands() == 4 &&
2611 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2612 return true;
2613 }
2614 break;
2615 }
2616 return false;
2617}
2618
2619// Return true if this instruction simply renames a general register without
2620// modifying bits.
2622 switch (MI.getOpcode()) {
2623 default:
2624 break;
2625 case TargetOpcode::COPY: {
2626 Register DstReg = MI.getOperand(0).getReg();
2627 return AArch64::FPR128RegClass.contains(DstReg);
2628 }
2629 case AArch64::ORRv16i8:
2630 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2631 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2632 "invalid ORRv16i8 operands");
2633 return true;
2634 }
2635 break;
2636 }
2637 return false;
2638}
2639
2640static bool isFrameLoadOpcode(int Opcode) {
2641 switch (Opcode) {
2642 default:
2643 return false;
2644 case AArch64::LDRWui:
2645 case AArch64::LDRXui:
2646 case AArch64::LDRBui:
2647 case AArch64::LDRHui:
2648 case AArch64::LDRSui:
2649 case AArch64::LDRDui:
2650 case AArch64::LDRQui:
2651 case AArch64::LDR_PXI:
2652 return true;
2653 }
2654}
2655
2657 int &FrameIndex) const {
2658 if (!isFrameLoadOpcode(MI.getOpcode()))
2659 return Register();
2660
2661 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2662 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2663 FrameIndex = MI.getOperand(1).getIndex();
2664 return MI.getOperand(0).getReg();
2665 }
2666 return Register();
2667}
2668
2669static bool isFrameStoreOpcode(int Opcode) {
2670 switch (Opcode) {
2671 default:
2672 return false;
2673 case AArch64::STRWui:
2674 case AArch64::STRXui:
2675 case AArch64::STRBui:
2676 case AArch64::STRHui:
2677 case AArch64::STRSui:
2678 case AArch64::STRDui:
2679 case AArch64::STRQui:
2680 case AArch64::STR_PXI:
2681 return true;
2682 }
2683}
2684
2686 int &FrameIndex) const {
2687 if (!isFrameStoreOpcode(MI.getOpcode()))
2688 return Register();
2689
2690 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2691 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2692 FrameIndex = MI.getOperand(1).getIndex();
2693 return MI.getOperand(0).getReg();
2694 }
2695 return Register();
2696}
2697
2699 int &FrameIndex) const {
2700 if (!isFrameStoreOpcode(MI.getOpcode()))
2701 return Register();
2702
2703 if (Register Reg = isStoreToStackSlot(MI, FrameIndex))
2704 return Reg;
2705
2707 if (hasStoreToStackSlot(MI, Accesses)) {
2708 if (Accesses.size() > 1)
2709 return Register();
2710
2711 FrameIndex =
2712 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2713 ->getFrameIndex();
2714 return MI.getOperand(0).getReg();
2715 }
2716 return Register();
2717}
2718
2720 int &FrameIndex) const {
2721 if (!isFrameLoadOpcode(MI.getOpcode()))
2722 return Register();
2723
2724 if (Register Reg = isLoadFromStackSlot(MI, FrameIndex))
2725 return Reg;
2726
2728 if (hasLoadFromStackSlot(MI, Accesses)) {
2729 if (Accesses.size() > 1)
2730 return Register();
2731
2732 FrameIndex =
2733 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2734 ->getFrameIndex();
2735 return MI.getOperand(0).getReg();
2736 }
2737 return Register();
2738}
2739
2740/// Check all MachineMemOperands for a hint to suppress pairing.
2742 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2743 return MMO->getFlags() & MOSuppressPair;
2744 });
2745}
2746
2747/// Set a flag on the first MachineMemOperand to suppress pairing.
2749 if (MI.memoperands_empty())
2750 return;
2751 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2752}
2753
2754/// Check all MachineMemOperands for a hint that the load/store is strided.
2756 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2757 return MMO->getFlags() & MOStridedAccess;
2758 });
2759}
2760
2762 switch (Opc) {
2763 default:
2764 return false;
2765 case AArch64::STURSi:
2766 case AArch64::STRSpre:
2767 case AArch64::STURDi:
2768 case AArch64::STRDpre:
2769 case AArch64::STURQi:
2770 case AArch64::STRQpre:
2771 case AArch64::STURBBi:
2772 case AArch64::STURHHi:
2773 case AArch64::STURWi:
2774 case AArch64::STRWpre:
2775 case AArch64::STURXi:
2776 case AArch64::STRXpre:
2777 case AArch64::LDURSi:
2778 case AArch64::LDRSpre:
2779 case AArch64::LDURDi:
2780 case AArch64::LDRDpre:
2781 case AArch64::LDURQi:
2782 case AArch64::LDRQpre:
2783 case AArch64::LDURWi:
2784 case AArch64::LDRWpre:
2785 case AArch64::LDURXi:
2786 case AArch64::LDRXpre:
2787 case AArch64::LDRSWpre:
2788 case AArch64::LDURSWi:
2789 case AArch64::LDURHHi:
2790 case AArch64::LDURBBi:
2791 case AArch64::LDURSBWi:
2792 case AArch64::LDURSHWi:
2793 return true;
2794 }
2795}
2796
2797std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2798 switch (Opc) {
2799 default: return {};
2800 case AArch64::PRFMui: return AArch64::PRFUMi;
2801 case AArch64::LDRXui: return AArch64::LDURXi;
2802 case AArch64::LDRWui: return AArch64::LDURWi;
2803 case AArch64::LDRBui: return AArch64::LDURBi;
2804 case AArch64::LDRHui: return AArch64::LDURHi;
2805 case AArch64::LDRSui: return AArch64::LDURSi;
2806 case AArch64::LDRDui: return AArch64::LDURDi;
2807 case AArch64::LDRQui: return AArch64::LDURQi;
2808 case AArch64::LDRBBui: return AArch64::LDURBBi;
2809 case AArch64::LDRHHui: return AArch64::LDURHHi;
2810 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2811 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2812 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2813 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2814 case AArch64::LDRSWui: return AArch64::LDURSWi;
2815 case AArch64::STRXui: return AArch64::STURXi;
2816 case AArch64::STRWui: return AArch64::STURWi;
2817 case AArch64::STRBui: return AArch64::STURBi;
2818 case AArch64::STRHui: return AArch64::STURHi;
2819 case AArch64::STRSui: return AArch64::STURSi;
2820 case AArch64::STRDui: return AArch64::STURDi;
2821 case AArch64::STRQui: return AArch64::STURQi;
2822 case AArch64::STRBBui: return AArch64::STURBBi;
2823 case AArch64::STRHHui: return AArch64::STURHHi;
2824 }
2825}
2826
2828 switch (Opc) {
2829 default:
2830 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2831 case AArch64::ADDG:
2832 case AArch64::LDAPURBi:
2833 case AArch64::LDAPURHi:
2834 case AArch64::LDAPURi:
2835 case AArch64::LDAPURSBWi:
2836 case AArch64::LDAPURSBXi:
2837 case AArch64::LDAPURSHWi:
2838 case AArch64::LDAPURSHXi:
2839 case AArch64::LDAPURSWi:
2840 case AArch64::LDAPURXi:
2841 case AArch64::LDR_PPXI:
2842 case AArch64::LDR_PXI:
2843 case AArch64::LDR_ZXI:
2844 case AArch64::LDR_ZZXI:
2845 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2846 case AArch64::LDR_ZZZXI:
2847 case AArch64::LDR_ZZZZXI:
2848 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2849 case AArch64::LDRBBui:
2850 case AArch64::LDRBui:
2851 case AArch64::LDRDui:
2852 case AArch64::LDRHHui:
2853 case AArch64::LDRHui:
2854 case AArch64::LDRQui:
2855 case AArch64::LDRSBWui:
2856 case AArch64::LDRSBXui:
2857 case AArch64::LDRSHWui:
2858 case AArch64::LDRSHXui:
2859 case AArch64::LDRSui:
2860 case AArch64::LDRSWui:
2861 case AArch64::LDRWui:
2862 case AArch64::LDRXui:
2863 case AArch64::LDURBBi:
2864 case AArch64::LDURBi:
2865 case AArch64::LDURDi:
2866 case AArch64::LDURHHi:
2867 case AArch64::LDURHi:
2868 case AArch64::LDURQi:
2869 case AArch64::LDURSBWi:
2870 case AArch64::LDURSBXi:
2871 case AArch64::LDURSHWi:
2872 case AArch64::LDURSHXi:
2873 case AArch64::LDURSi:
2874 case AArch64::LDURSWi:
2875 case AArch64::LDURWi:
2876 case AArch64::LDURXi:
2877 case AArch64::PRFMui:
2878 case AArch64::PRFUMi:
2879 case AArch64::ST2Gi:
2880 case AArch64::STGi:
2881 case AArch64::STLURBi:
2882 case AArch64::STLURHi:
2883 case AArch64::STLURWi:
2884 case AArch64::STLURXi:
2885 case AArch64::StoreSwiftAsyncContext:
2886 case AArch64::STR_PPXI:
2887 case AArch64::STR_PXI:
2888 case AArch64::STR_ZXI:
2889 case AArch64::STR_ZZXI:
2890 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2891 case AArch64::STR_ZZZXI:
2892 case AArch64::STR_ZZZZXI:
2893 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2894 case AArch64::STRBBui:
2895 case AArch64::STRBui:
2896 case AArch64::STRDui:
2897 case AArch64::STRHHui:
2898 case AArch64::STRHui:
2899 case AArch64::STRQui:
2900 case AArch64::STRSui:
2901 case AArch64::STRWui:
2902 case AArch64::STRXui:
2903 case AArch64::STURBBi:
2904 case AArch64::STURBi:
2905 case AArch64::STURDi:
2906 case AArch64::STURHHi:
2907 case AArch64::STURHi:
2908 case AArch64::STURQi:
2909 case AArch64::STURSi:
2910 case AArch64::STURWi:
2911 case AArch64::STURXi:
2912 case AArch64::STZ2Gi:
2913 case AArch64::STZGi:
2914 case AArch64::TAGPstack:
2915 return 2;
2916 case AArch64::LD1B_D_IMM:
2917 case AArch64::LD1B_H_IMM:
2918 case AArch64::LD1B_IMM:
2919 case AArch64::LD1B_S_IMM:
2920 case AArch64::LD1D_IMM:
2921 case AArch64::LD1H_D_IMM:
2922 case AArch64::LD1H_IMM:
2923 case AArch64::LD1H_S_IMM:
2924 case AArch64::LD1RB_D_IMM:
2925 case AArch64::LD1RB_H_IMM:
2926 case AArch64::LD1RB_IMM:
2927 case AArch64::LD1RB_S_IMM:
2928 case AArch64::LD1RD_IMM:
2929 case AArch64::LD1RH_D_IMM:
2930 case AArch64::LD1RH_IMM:
2931 case AArch64::LD1RH_S_IMM:
2932 case AArch64::LD1RSB_D_IMM:
2933 case AArch64::LD1RSB_H_IMM:
2934 case AArch64::LD1RSB_S_IMM:
2935 case AArch64::LD1RSH_D_IMM:
2936 case AArch64::LD1RSH_S_IMM:
2937 case AArch64::LD1RSW_IMM:
2938 case AArch64::LD1RW_D_IMM:
2939 case AArch64::LD1RW_IMM:
2940 case AArch64::LD1SB_D_IMM:
2941 case AArch64::LD1SB_H_IMM:
2942 case AArch64::LD1SB_S_IMM:
2943 case AArch64::LD1SH_D_IMM:
2944 case AArch64::LD1SH_S_IMM:
2945 case AArch64::LD1SW_D_IMM:
2946 case AArch64::LD1W_D_IMM:
2947 case AArch64::LD1W_IMM:
2948 case AArch64::LD2B_IMM:
2949 case AArch64::LD2D_IMM:
2950 case AArch64::LD2H_IMM:
2951 case AArch64::LD2W_IMM:
2952 case AArch64::LD3B_IMM:
2953 case AArch64::LD3D_IMM:
2954 case AArch64::LD3H_IMM:
2955 case AArch64::LD3W_IMM:
2956 case AArch64::LD4B_IMM:
2957 case AArch64::LD4D_IMM:
2958 case AArch64::LD4H_IMM:
2959 case AArch64::LD4W_IMM:
2960 case AArch64::LDG:
2961 case AArch64::LDNF1B_D_IMM:
2962 case AArch64::LDNF1B_H_IMM:
2963 case AArch64::LDNF1B_IMM:
2964 case AArch64::LDNF1B_S_IMM:
2965 case AArch64::LDNF1D_IMM:
2966 case AArch64::LDNF1H_D_IMM:
2967 case AArch64::LDNF1H_IMM:
2968 case AArch64::LDNF1H_S_IMM:
2969 case AArch64::LDNF1SB_D_IMM:
2970 case AArch64::LDNF1SB_H_IMM:
2971 case AArch64::LDNF1SB_S_IMM:
2972 case AArch64::LDNF1SH_D_IMM:
2973 case AArch64::LDNF1SH_S_IMM:
2974 case AArch64::LDNF1SW_D_IMM:
2975 case AArch64::LDNF1W_D_IMM:
2976 case AArch64::LDNF1W_IMM:
2977 case AArch64::LDNPDi:
2978 case AArch64::LDNPQi:
2979 case AArch64::LDNPSi:
2980 case AArch64::LDNPWi:
2981 case AArch64::LDNPXi:
2982 case AArch64::LDNT1B_ZRI:
2983 case AArch64::LDNT1D_ZRI:
2984 case AArch64::LDNT1H_ZRI:
2985 case AArch64::LDNT1W_ZRI:
2986 case AArch64::LDPDi:
2987 case AArch64::LDPQi:
2988 case AArch64::LDPSi:
2989 case AArch64::LDPWi:
2990 case AArch64::LDPXi:
2991 case AArch64::LDRBBpost:
2992 case AArch64::LDRBBpre:
2993 case AArch64::LDRBpost:
2994 case AArch64::LDRBpre:
2995 case AArch64::LDRDpost:
2996 case AArch64::LDRDpre:
2997 case AArch64::LDRHHpost:
2998 case AArch64::LDRHHpre:
2999 case AArch64::LDRHpost:
3000 case AArch64::LDRHpre:
3001 case AArch64::LDRQpost:
3002 case AArch64::LDRQpre:
3003 case AArch64::LDRSpost:
3004 case AArch64::LDRSpre:
3005 case AArch64::LDRWpost:
3006 case AArch64::LDRWpre:
3007 case AArch64::LDRXpost:
3008 case AArch64::LDRXpre:
3009 case AArch64::ST1B_D_IMM:
3010 case AArch64::ST1B_H_IMM:
3011 case AArch64::ST1B_IMM:
3012 case AArch64::ST1B_S_IMM:
3013 case AArch64::ST1D_IMM:
3014 case AArch64::ST1H_D_IMM:
3015 case AArch64::ST1H_IMM:
3016 case AArch64::ST1H_S_IMM:
3017 case AArch64::ST1W_D_IMM:
3018 case AArch64::ST1W_IMM:
3019 case AArch64::ST2B_IMM:
3020 case AArch64::ST2D_IMM:
3021 case AArch64::ST2H_IMM:
3022 case AArch64::ST2W_IMM:
3023 case AArch64::ST3B_IMM:
3024 case AArch64::ST3D_IMM:
3025 case AArch64::ST3H_IMM:
3026 case AArch64::ST3W_IMM:
3027 case AArch64::ST4B_IMM:
3028 case AArch64::ST4D_IMM:
3029 case AArch64::ST4H_IMM:
3030 case AArch64::ST4W_IMM:
3031 case AArch64::STGPi:
3032 case AArch64::STGPreIndex:
3033 case AArch64::STZGPreIndex:
3034 case AArch64::ST2GPreIndex:
3035 case AArch64::STZ2GPreIndex:
3036 case AArch64::STGPostIndex:
3037 case AArch64::STZGPostIndex:
3038 case AArch64::ST2GPostIndex:
3039 case AArch64::STZ2GPostIndex:
3040 case AArch64::STNPDi:
3041 case AArch64::STNPQi:
3042 case AArch64::STNPSi:
3043 case AArch64::STNPWi:
3044 case AArch64::STNPXi:
3045 case AArch64::STNT1B_ZRI:
3046 case AArch64::STNT1D_ZRI:
3047 case AArch64::STNT1H_ZRI:
3048 case AArch64::STNT1W_ZRI:
3049 case AArch64::STPDi:
3050 case AArch64::STPQi:
3051 case AArch64::STPSi:
3052 case AArch64::STPWi:
3053 case AArch64::STPXi:
3054 case AArch64::STRBBpost:
3055 case AArch64::STRBBpre:
3056 case AArch64::STRBpost:
3057 case AArch64::STRBpre:
3058 case AArch64::STRDpost:
3059 case AArch64::STRDpre:
3060 case AArch64::STRHHpost:
3061 case AArch64::STRHHpre:
3062 case AArch64::STRHpost:
3063 case AArch64::STRHpre:
3064 case AArch64::STRQpost:
3065 case AArch64::STRQpre:
3066 case AArch64::STRSpost:
3067 case AArch64::STRSpre:
3068 case AArch64::STRWpost:
3069 case AArch64::STRWpre:
3070 case AArch64::STRXpost:
3071 case AArch64::STRXpre:
3072 return 3;
3073 case AArch64::LDPDpost:
3074 case AArch64::LDPDpre:
3075 case AArch64::LDPQpost:
3076 case AArch64::LDPQpre:
3077 case AArch64::LDPSpost:
3078 case AArch64::LDPSpre:
3079 case AArch64::LDPWpost:
3080 case AArch64::LDPWpre:
3081 case AArch64::LDPXpost:
3082 case AArch64::LDPXpre:
3083 case AArch64::STGPpre:
3084 case AArch64::STGPpost:
3085 case AArch64::STPDpost:
3086 case AArch64::STPDpre:
3087 case AArch64::STPQpost:
3088 case AArch64::STPQpre:
3089 case AArch64::STPSpost:
3090 case AArch64::STPSpre:
3091 case AArch64::STPWpost:
3092 case AArch64::STPWpre:
3093 case AArch64::STPXpost:
3094 case AArch64::STPXpre:
3095 return 4;
3096 }
3097}
3098
3100 switch (MI.getOpcode()) {
3101 default:
3102 return false;
3103 // Scaled instructions.
3104 case AArch64::STRSui:
3105 case AArch64::STRDui:
3106 case AArch64::STRQui:
3107 case AArch64::STRXui:
3108 case AArch64::STRWui:
3109 case AArch64::LDRSui:
3110 case AArch64::LDRDui:
3111 case AArch64::LDRQui:
3112 case AArch64::LDRXui:
3113 case AArch64::LDRWui:
3114 case AArch64::LDRSWui:
3115 // Unscaled instructions.
3116 case AArch64::STURSi:
3117 case AArch64::STRSpre:
3118 case AArch64::STURDi:
3119 case AArch64::STRDpre:
3120 case AArch64::STURQi:
3121 case AArch64::STRQpre:
3122 case AArch64::STURWi:
3123 case AArch64::STRWpre:
3124 case AArch64::STURXi:
3125 case AArch64::STRXpre:
3126 case AArch64::LDURSi:
3127 case AArch64::LDRSpre:
3128 case AArch64::LDURDi:
3129 case AArch64::LDRDpre:
3130 case AArch64::LDURQi:
3131 case AArch64::LDRQpre:
3132 case AArch64::LDURWi:
3133 case AArch64::LDRWpre:
3134 case AArch64::LDURXi:
3135 case AArch64::LDRXpre:
3136 case AArch64::LDURSWi:
3137 case AArch64::LDRSWpre:
3138 // SVE instructions.
3139 case AArch64::LDR_ZXI:
3140 case AArch64::STR_ZXI:
3141 return true;
3142 }
3143}
3144
3146 switch (MI.getOpcode()) {
3147 default:
3148 assert((!MI.isCall() || !MI.isReturn()) &&
3149 "Unexpected instruction - was a new tail call opcode introduced?");
3150 return false;
3151 case AArch64::TCRETURNdi:
3152 case AArch64::TCRETURNri:
3153 case AArch64::TCRETURNrix16x17:
3154 case AArch64::TCRETURNrix17:
3155 case AArch64::TCRETURNrinotx16:
3156 case AArch64::TCRETURNriALL:
3157 case AArch64::AUTH_TCRETURN:
3158 case AArch64::AUTH_TCRETURN_BTI:
3159 return true;
3160 }
3161}
3162
3164 switch (Opc) {
3165 default:
3166 llvm_unreachable("Opcode has no flag setting equivalent!");
3167 // 32-bit cases:
3168 case AArch64::ADDWri:
3169 return AArch64::ADDSWri;
3170 case AArch64::ADDWrr:
3171 return AArch64::ADDSWrr;
3172 case AArch64::ADDWrs:
3173 return AArch64::ADDSWrs;
3174 case AArch64::ADDWrx:
3175 return AArch64::ADDSWrx;
3176 case AArch64::ANDWri:
3177 return AArch64::ANDSWri;
3178 case AArch64::ANDWrr:
3179 return AArch64::ANDSWrr;
3180 case AArch64::ANDWrs:
3181 return AArch64::ANDSWrs;
3182 case AArch64::BICWrr:
3183 return AArch64::BICSWrr;
3184 case AArch64::BICWrs:
3185 return AArch64::BICSWrs;
3186 case AArch64::SUBWri:
3187 return AArch64::SUBSWri;
3188 case AArch64::SUBWrr:
3189 return AArch64::SUBSWrr;
3190 case AArch64::SUBWrs:
3191 return AArch64::SUBSWrs;
3192 case AArch64::SUBWrx:
3193 return AArch64::SUBSWrx;
3194 // 64-bit cases:
3195 case AArch64::ADDXri:
3196 return AArch64::ADDSXri;
3197 case AArch64::ADDXrr:
3198 return AArch64::ADDSXrr;
3199 case AArch64::ADDXrs:
3200 return AArch64::ADDSXrs;
3201 case AArch64::ADDXrx:
3202 return AArch64::ADDSXrx;
3203 case AArch64::ANDXri:
3204 return AArch64::ANDSXri;
3205 case AArch64::ANDXrr:
3206 return AArch64::ANDSXrr;
3207 case AArch64::ANDXrs:
3208 return AArch64::ANDSXrs;
3209 case AArch64::BICXrr:
3210 return AArch64::BICSXrr;
3211 case AArch64::BICXrs:
3212 return AArch64::BICSXrs;
3213 case AArch64::SUBXri:
3214 return AArch64::SUBSXri;
3215 case AArch64::SUBXrr:
3216 return AArch64::SUBSXrr;
3217 case AArch64::SUBXrs:
3218 return AArch64::SUBSXrs;
3219 case AArch64::SUBXrx:
3220 return AArch64::SUBSXrx;
3221 // SVE instructions:
3222 case AArch64::AND_PPzPP:
3223 return AArch64::ANDS_PPzPP;
3224 case AArch64::BIC_PPzPP:
3225 return AArch64::BICS_PPzPP;
3226 case AArch64::EOR_PPzPP:
3227 return AArch64::EORS_PPzPP;
3228 case AArch64::NAND_PPzPP:
3229 return AArch64::NANDS_PPzPP;
3230 case AArch64::NOR_PPzPP:
3231 return AArch64::NORS_PPzPP;
3232 case AArch64::ORN_PPzPP:
3233 return AArch64::ORNS_PPzPP;
3234 case AArch64::ORR_PPzPP:
3235 return AArch64::ORRS_PPzPP;
3236 case AArch64::BRKA_PPzP:
3237 return AArch64::BRKAS_PPzP;
3238 case AArch64::BRKPA_PPzPP:
3239 return AArch64::BRKPAS_PPzPP;
3240 case AArch64::BRKB_PPzP:
3241 return AArch64::BRKBS_PPzP;
3242 case AArch64::BRKPB_PPzPP:
3243 return AArch64::BRKPBS_PPzPP;
3244 case AArch64::BRKN_PPzP:
3245 return AArch64::BRKNS_PPzP;
3246 case AArch64::RDFFR_PPz:
3247 return AArch64::RDFFRS_PPz;
3248 case AArch64::PTRUE_B:
3249 return AArch64::PTRUES_B;
3250 }
3251}
3252
3253// Is this a candidate for ld/st merging or pairing? For example, we don't
3254// touch volatiles or load/stores that have a hint to avoid pair formation.
3256
3257 bool IsPreLdSt = isPreLdSt(MI);
3258
3259 // If this is a volatile load/store, don't mess with it.
3260 if (MI.hasOrderedMemoryRef())
3261 return false;
3262
3263 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
3264 // For Pre-inc LD/ST, the operand is shifted by one.
3265 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
3266 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
3267 "Expected a reg or frame index operand.");
3268
3269 // For Pre-indexed addressing quadword instructions, the third operand is the
3270 // immediate value.
3271 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
3272
3273 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
3274 return false;
3275
3276 // Can't merge/pair if the instruction modifies the base register.
3277 // e.g., ldr x0, [x0]
3278 // This case will never occur with an FI base.
3279 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
3280 // STR<S,D,Q,W,X>pre, it can be merged.
3281 // For example:
3282 // ldr q0, [x11, #32]!
3283 // ldr q1, [x11, #16]
3284 // to
3285 // ldp q0, q1, [x11, #32]!
3286 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
3287 Register BaseReg = MI.getOperand(1).getReg();
3289 if (MI.modifiesRegister(BaseReg, TRI))
3290 return false;
3291 }
3292
3293 // Pairing SVE fills/spills is only valid for little-endian targets that
3294 // implement VLS 128.
3295 switch (MI.getOpcode()) {
3296 default:
3297 break;
3298 case AArch64::LDR_ZXI:
3299 case AArch64::STR_ZXI:
3300 if (!Subtarget.isLittleEndian() ||
3301 Subtarget.getSVEVectorSizeInBits() != 128)
3302 return false;
3303 }
3304
3305 // Check if this load/store has a hint to avoid pair formation.
3306 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
3308 return false;
3309
3310 // Do not pair any callee-save store/reload instructions in the
3311 // prologue/epilogue if the CFI information encoded the operations as separate
3312 // instructions, as that will cause the size of the actual prologue to mismatch
3313 // with the prologue size recorded in the Windows CFI.
3314 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
3315 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
3316 MI.getMF()->getFunction().needsUnwindTableEntry();
3317 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
3319 return false;
3320
3321 // On some CPUs quad load/store pairs are slower than two single load/stores.
3322 if (Subtarget.isPaired128Slow()) {
3323 switch (MI.getOpcode()) {
3324 default:
3325 break;
3326 case AArch64::LDURQi:
3327 case AArch64::STURQi:
3328 case AArch64::LDRQui:
3329 case AArch64::STRQui:
3330 return false;
3331 }
3332 }
3333
3334 return true;
3335}
3336
3339 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3340 const TargetRegisterInfo *TRI) const {
3341 if (!LdSt.mayLoadOrStore())
3342 return false;
3343
3344 const MachineOperand *BaseOp;
3345 TypeSize WidthN(0, false);
3346 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3347 WidthN, TRI))
3348 return false;
3349 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3350 // vector.
3351 Width = LocationSize::precise(WidthN);
3352 BaseOps.push_back(BaseOp);
3353 return true;
3354}
3355
3356std::optional<ExtAddrMode>
3358 const TargetRegisterInfo *TRI) const {
3359 const MachineOperand *Base; // Filled with the base operand of MI.
3360 int64_t Offset; // Filled with the offset of MI.
3361 bool OffsetIsScalable;
3362 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3363 return std::nullopt;
3364
3365 if (!Base->isReg())
3366 return std::nullopt;
3367 ExtAddrMode AM;
3368 AM.BaseReg = Base->getReg();
3369 AM.Displacement = Offset;
3370 AM.ScaledReg = 0;
3371 AM.Scale = 0;
3372 return AM;
3373}
3374
3376 Register Reg,
3377 const MachineInstr &AddrI,
3378 ExtAddrMode &AM) const {
3379 // Filter out instructions into which we cannot fold.
3380 unsigned NumBytes;
3381 int64_t OffsetScale = 1;
3382 switch (MemI.getOpcode()) {
3383 default:
3384 return false;
3385
3386 case AArch64::LDURQi:
3387 case AArch64::STURQi:
3388 NumBytes = 16;
3389 break;
3390
3391 case AArch64::LDURDi:
3392 case AArch64::STURDi:
3393 case AArch64::LDURXi:
3394 case AArch64::STURXi:
3395 NumBytes = 8;
3396 break;
3397
3398 case AArch64::LDURWi:
3399 case AArch64::LDURSWi:
3400 case AArch64::STURWi:
3401 NumBytes = 4;
3402 break;
3403
3404 case AArch64::LDURHi:
3405 case AArch64::STURHi:
3406 case AArch64::LDURHHi:
3407 case AArch64::STURHHi:
3408 case AArch64::LDURSHXi:
3409 case AArch64::LDURSHWi:
3410 NumBytes = 2;
3411 break;
3412
3413 case AArch64::LDRBroX:
3414 case AArch64::LDRBBroX:
3415 case AArch64::LDRSBXroX:
3416 case AArch64::LDRSBWroX:
3417 case AArch64::STRBroX:
3418 case AArch64::STRBBroX:
3419 case AArch64::LDURBi:
3420 case AArch64::LDURBBi:
3421 case AArch64::LDURSBXi:
3422 case AArch64::LDURSBWi:
3423 case AArch64::STURBi:
3424 case AArch64::STURBBi:
3425 case AArch64::LDRBui:
3426 case AArch64::LDRBBui:
3427 case AArch64::LDRSBXui:
3428 case AArch64::LDRSBWui:
3429 case AArch64::STRBui:
3430 case AArch64::STRBBui:
3431 NumBytes = 1;
3432 break;
3433
3434 case AArch64::LDRQroX:
3435 case AArch64::STRQroX:
3436 case AArch64::LDRQui:
3437 case AArch64::STRQui:
3438 NumBytes = 16;
3439 OffsetScale = 16;
3440 break;
3441
3442 case AArch64::LDRDroX:
3443 case AArch64::STRDroX:
3444 case AArch64::LDRXroX:
3445 case AArch64::STRXroX:
3446 case AArch64::LDRDui:
3447 case AArch64::STRDui:
3448 case AArch64::LDRXui:
3449 case AArch64::STRXui:
3450 NumBytes = 8;
3451 OffsetScale = 8;
3452 break;
3453
3454 case AArch64::LDRWroX:
3455 case AArch64::LDRSWroX:
3456 case AArch64::STRWroX:
3457 case AArch64::LDRWui:
3458 case AArch64::LDRSWui:
3459 case AArch64::STRWui:
3460 NumBytes = 4;
3461 OffsetScale = 4;
3462 break;
3463
3464 case AArch64::LDRHroX:
3465 case AArch64::STRHroX:
3466 case AArch64::LDRHHroX:
3467 case AArch64::STRHHroX:
3468 case AArch64::LDRSHXroX:
3469 case AArch64::LDRSHWroX:
3470 case AArch64::LDRHui:
3471 case AArch64::STRHui:
3472 case AArch64::LDRHHui:
3473 case AArch64::STRHHui:
3474 case AArch64::LDRSHXui:
3475 case AArch64::LDRSHWui:
3476 NumBytes = 2;
3477 OffsetScale = 2;
3478 break;
3479 }
3480
3481 // Check the fold operand is not the loaded/stored value.
3482 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3483 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3484 return false;
3485
3486 // Handle memory instructions with a [Reg, Reg] addressing mode.
3487 if (MemI.getOperand(2).isReg()) {
3488 // Bail if the addressing mode already includes extension of the offset
3489 // register.
3490 if (MemI.getOperand(3).getImm())
3491 return false;
3492
3493 // Check if we actually have a scaled offset.
3494 if (MemI.getOperand(4).getImm() == 0)
3495 OffsetScale = 1;
3496
3497 // If the address instructions is folded into the base register, then the
3498 // addressing mode must not have a scale. Then we can swap the base and the
3499 // scaled registers.
3500 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3501 return false;
3502
3503 switch (AddrI.getOpcode()) {
3504 default:
3505 return false;
3506
3507 case AArch64::SBFMXri:
3508 // sxtw Xa, Wm
3509 // ldr Xd, [Xn, Xa, lsl #N]
3510 // ->
3511 // ldr Xd, [Xn, Wm, sxtw #N]
3512 if (AddrI.getOperand(2).getImm() != 0 ||
3513 AddrI.getOperand(3).getImm() != 31)
3514 return false;
3515
3516 AM.BaseReg = MemI.getOperand(1).getReg();
3517 if (AM.BaseReg == Reg)
3518 AM.BaseReg = MemI.getOperand(2).getReg();
3519 AM.ScaledReg = AddrI.getOperand(1).getReg();
3520 AM.Scale = OffsetScale;
3521 AM.Displacement = 0;
3523 return true;
3524
3525 case TargetOpcode::SUBREG_TO_REG: {
3526 // mov Wa, Wm
3527 // ldr Xd, [Xn, Xa, lsl #N]
3528 // ->
3529 // ldr Xd, [Xn, Wm, uxtw #N]
3530
3531 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3532 if (AddrI.getOperand(1).getImm() != 0 ||
3533 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3534 return false;
3535
3536 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3537 Register OffsetReg = AddrI.getOperand(2).getReg();
3538 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3539 return false;
3540
3541 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3542 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3543 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3544 DefMI.getOperand(3).getImm() != 0)
3545 return false;
3546
3547 AM.BaseReg = MemI.getOperand(1).getReg();
3548 if (AM.BaseReg == Reg)
3549 AM.BaseReg = MemI.getOperand(2).getReg();
3550 AM.ScaledReg = DefMI.getOperand(2).getReg();
3551 AM.Scale = OffsetScale;
3552 AM.Displacement = 0;
3554 return true;
3555 }
3556 }
3557 }
3558
3559 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3560
3561 // Check we are not breaking a potential conversion to an LDP.
3562 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3563 int64_t NewOffset) -> bool {
3564 int64_t MinOffset, MaxOffset;
3565 switch (NumBytes) {
3566 default:
3567 return true;
3568 case 4:
3569 MinOffset = -256;
3570 MaxOffset = 252;
3571 break;
3572 case 8:
3573 MinOffset = -512;
3574 MaxOffset = 504;
3575 break;
3576 case 16:
3577 MinOffset = -1024;
3578 MaxOffset = 1008;
3579 break;
3580 }
3581 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3582 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3583 };
3584 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3585 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3586 int64_t NewOffset = OldOffset + Disp;
3587 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3588 return false;
3589 // If the old offset would fit into an LDP, but the new offset wouldn't,
3590 // bail out.
3591 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3592 return false;
3593 AM.BaseReg = AddrI.getOperand(1).getReg();
3594 AM.ScaledReg = 0;
3595 AM.Scale = 0;
3596 AM.Displacement = NewOffset;
3598 return true;
3599 };
3600
3601 auto canFoldAddRegIntoAddrMode =
3602 [&](int64_t Scale,
3604 if (MemI.getOperand(2).getImm() != 0)
3605 return false;
3606 if ((unsigned)Scale != Scale)
3607 return false;
3608 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3609 return false;
3610 AM.BaseReg = AddrI.getOperand(1).getReg();
3611 AM.ScaledReg = AddrI.getOperand(2).getReg();
3612 AM.Scale = Scale;
3613 AM.Displacement = 0;
3614 AM.Form = Form;
3615 return true;
3616 };
3617
3618 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3619 unsigned Opcode = MemI.getOpcode();
3620 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3621 Subtarget.isSTRQroSlow();
3622 };
3623
3624 int64_t Disp = 0;
3625 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3626 switch (AddrI.getOpcode()) {
3627 default:
3628 return false;
3629
3630 case AArch64::ADDXri:
3631 // add Xa, Xn, #N
3632 // ldr Xd, [Xa, #M]
3633 // ->
3634 // ldr Xd, [Xn, #N'+M]
3635 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3636 return canFoldAddSubImmIntoAddrMode(Disp);
3637
3638 case AArch64::SUBXri:
3639 // sub Xa, Xn, #N
3640 // ldr Xd, [Xa, #M]
3641 // ->
3642 // ldr Xd, [Xn, #N'+M]
3643 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3644 return canFoldAddSubImmIntoAddrMode(-Disp);
3645
3646 case AArch64::ADDXrs: {
3647 // add Xa, Xn, Xm, lsl #N
3648 // ldr Xd, [Xa]
3649 // ->
3650 // ldr Xd, [Xn, Xm, lsl #N]
3651
3652 // Don't fold the add if the result would be slower, unless optimising for
3653 // size.
3654 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3656 return false;
3657 Shift = AArch64_AM::getShiftValue(Shift);
3658 if (!OptSize) {
3659 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3660 return false;
3661 if (avoidSlowSTRQ(MemI))
3662 return false;
3663 }
3664 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3665 }
3666
3667 case AArch64::ADDXrr:
3668 // add Xa, Xn, Xm
3669 // ldr Xd, [Xa]
3670 // ->
3671 // ldr Xd, [Xn, Xm, lsl #0]
3672
3673 // Don't fold the add if the result would be slower, unless optimising for
3674 // size.
3675 if (!OptSize && avoidSlowSTRQ(MemI))
3676 return false;
3677 return canFoldAddRegIntoAddrMode(1);
3678
3679 case AArch64::ADDXrx:
3680 // add Xa, Xn, Wm, {s,u}xtw #N
3681 // ldr Xd, [Xa]
3682 // ->
3683 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3684
3685 // Don't fold the add if the result would be slower, unless optimising for
3686 // size.
3687 if (!OptSize && avoidSlowSTRQ(MemI))
3688 return false;
3689
3690 // Can fold only sign-/zero-extend of a word.
3691 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3693 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3694 return false;
3695
3696 return canFoldAddRegIntoAddrMode(
3697 1ULL << AArch64_AM::getArithShiftValue(Imm),
3700 }
3701}
3702
3703// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3704// return the opcode of an instruction performing the same operation, but using
3705// the [Reg, Reg] addressing mode.
3706static unsigned regOffsetOpcode(unsigned Opcode) {
3707 switch (Opcode) {
3708 default:
3709 llvm_unreachable("Address folding not implemented for instruction");
3710
3711 case AArch64::LDURQi:
3712 case AArch64::LDRQui:
3713 return AArch64::LDRQroX;
3714 case AArch64::STURQi:
3715 case AArch64::STRQui:
3716 return AArch64::STRQroX;
3717 case AArch64::LDURDi:
3718 case AArch64::LDRDui:
3719 return AArch64::LDRDroX;
3720 case AArch64::STURDi:
3721 case AArch64::STRDui:
3722 return AArch64::STRDroX;
3723 case AArch64::LDURXi:
3724 case AArch64::LDRXui:
3725 return AArch64::LDRXroX;
3726 case AArch64::STURXi:
3727 case AArch64::STRXui:
3728 return AArch64::STRXroX;
3729 case AArch64::LDURWi:
3730 case AArch64::LDRWui:
3731 return AArch64::LDRWroX;
3732 case AArch64::LDURSWi:
3733 case AArch64::LDRSWui:
3734 return AArch64::LDRSWroX;
3735 case AArch64::STURWi:
3736 case AArch64::STRWui:
3737 return AArch64::STRWroX;
3738 case AArch64::LDURHi:
3739 case AArch64::LDRHui:
3740 return AArch64::LDRHroX;
3741 case AArch64::STURHi:
3742 case AArch64::STRHui:
3743 return AArch64::STRHroX;
3744 case AArch64::LDURHHi:
3745 case AArch64::LDRHHui:
3746 return AArch64::LDRHHroX;
3747 case AArch64::STURHHi:
3748 case AArch64::STRHHui:
3749 return AArch64::STRHHroX;
3750 case AArch64::LDURSHXi:
3751 case AArch64::LDRSHXui:
3752 return AArch64::LDRSHXroX;
3753 case AArch64::LDURSHWi:
3754 case AArch64::LDRSHWui:
3755 return AArch64::LDRSHWroX;
3756 case AArch64::LDURBi:
3757 case AArch64::LDRBui:
3758 return AArch64::LDRBroX;
3759 case AArch64::LDURBBi:
3760 case AArch64::LDRBBui:
3761 return AArch64::LDRBBroX;
3762 case AArch64::LDURSBXi:
3763 case AArch64::LDRSBXui:
3764 return AArch64::LDRSBXroX;
3765 case AArch64::LDURSBWi:
3766 case AArch64::LDRSBWui:
3767 return AArch64::LDRSBWroX;
3768 case AArch64::STURBi:
3769 case AArch64::STRBui:
3770 return AArch64::STRBroX;
3771 case AArch64::STURBBi:
3772 case AArch64::STRBBui:
3773 return AArch64::STRBBroX;
3774 }
3775}
3776
3777// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3778// the opcode of an instruction performing the same operation, but using the
3779// [Reg, #Imm] addressing mode with scaled offset.
3780unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3781 switch (Opcode) {
3782 default:
3783 llvm_unreachable("Address folding not implemented for instruction");
3784
3785 case AArch64::LDURQi:
3786 Scale = 16;
3787 return AArch64::LDRQui;
3788 case AArch64::STURQi:
3789 Scale = 16;
3790 return AArch64::STRQui;
3791 case AArch64::LDURDi:
3792 Scale = 8;
3793 return AArch64::LDRDui;
3794 case AArch64::STURDi:
3795 Scale = 8;
3796 return AArch64::STRDui;
3797 case AArch64::LDURXi:
3798 Scale = 8;
3799 return AArch64::LDRXui;
3800 case AArch64::STURXi:
3801 Scale = 8;
3802 return AArch64::STRXui;
3803 case AArch64::LDURWi:
3804 Scale = 4;
3805 return AArch64::LDRWui;
3806 case AArch64::LDURSWi:
3807 Scale = 4;
3808 return AArch64::LDRSWui;
3809 case AArch64::STURWi:
3810 Scale = 4;
3811 return AArch64::STRWui;
3812 case AArch64::LDURHi:
3813 Scale = 2;
3814 return AArch64::LDRHui;
3815 case AArch64::STURHi:
3816 Scale = 2;
3817 return AArch64::STRHui;
3818 case AArch64::LDURHHi:
3819 Scale = 2;
3820 return AArch64::LDRHHui;
3821 case AArch64::STURHHi:
3822 Scale = 2;
3823 return AArch64::STRHHui;
3824 case AArch64::LDURSHXi:
3825 Scale = 2;
3826 return AArch64::LDRSHXui;
3827 case AArch64::LDURSHWi:
3828 Scale = 2;
3829 return AArch64::LDRSHWui;
3830 case AArch64::LDURBi:
3831 Scale = 1;
3832 return AArch64::LDRBui;
3833 case AArch64::LDURBBi:
3834 Scale = 1;
3835 return AArch64::LDRBBui;
3836 case AArch64::LDURSBXi:
3837 Scale = 1;
3838 return AArch64::LDRSBXui;
3839 case AArch64::LDURSBWi:
3840 Scale = 1;
3841 return AArch64::LDRSBWui;
3842 case AArch64::STURBi:
3843 Scale = 1;
3844 return AArch64::STRBui;
3845 case AArch64::STURBBi:
3846 Scale = 1;
3847 return AArch64::STRBBui;
3848 case AArch64::LDRQui:
3849 case AArch64::STRQui:
3850 Scale = 16;
3851 return Opcode;
3852 case AArch64::LDRDui:
3853 case AArch64::STRDui:
3854 case AArch64::LDRXui:
3855 case AArch64::STRXui:
3856 Scale = 8;
3857 return Opcode;
3858 case AArch64::LDRWui:
3859 case AArch64::LDRSWui:
3860 case AArch64::STRWui:
3861 Scale = 4;
3862 return Opcode;
3863 case AArch64::LDRHui:
3864 case AArch64::STRHui:
3865 case AArch64::LDRHHui:
3866 case AArch64::STRHHui:
3867 case AArch64::LDRSHXui:
3868 case AArch64::LDRSHWui:
3869 Scale = 2;
3870 return Opcode;
3871 case AArch64::LDRBui:
3872 case AArch64::LDRBBui:
3873 case AArch64::LDRSBXui:
3874 case AArch64::LDRSBWui:
3875 case AArch64::STRBui:
3876 case AArch64::STRBBui:
3877 Scale = 1;
3878 return Opcode;
3879 }
3880}
3881
3882// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3883// the opcode of an instruction performing the same operation, but using the
3884// [Reg, #Imm] addressing mode with unscaled offset.
3885unsigned unscaledOffsetOpcode(unsigned Opcode) {
3886 switch (Opcode) {
3887 default:
3888 llvm_unreachable("Address folding not implemented for instruction");
3889
3890 case AArch64::LDURQi:
3891 case AArch64::STURQi:
3892 case AArch64::LDURDi:
3893 case AArch64::STURDi:
3894 case AArch64::LDURXi:
3895 case AArch64::STURXi:
3896 case AArch64::LDURWi:
3897 case AArch64::LDURSWi:
3898 case AArch64::STURWi:
3899 case AArch64::LDURHi:
3900 case AArch64::STURHi:
3901 case AArch64::LDURHHi:
3902 case AArch64::STURHHi:
3903 case AArch64::LDURSHXi:
3904 case AArch64::LDURSHWi:
3905 case AArch64::LDURBi:
3906 case AArch64::STURBi:
3907 case AArch64::LDURBBi:
3908 case AArch64::STURBBi:
3909 case AArch64::LDURSBWi:
3910 case AArch64::LDURSBXi:
3911 return Opcode;
3912 case AArch64::LDRQui:
3913 return AArch64::LDURQi;
3914 case AArch64::STRQui:
3915 return AArch64::STURQi;
3916 case AArch64::LDRDui:
3917 return AArch64::LDURDi;
3918 case AArch64::STRDui:
3919 return AArch64::STURDi;
3920 case AArch64::LDRXui:
3921 return AArch64::LDURXi;
3922 case AArch64::STRXui:
3923 return AArch64::STURXi;
3924 case AArch64::LDRWui:
3925 return AArch64::LDURWi;
3926 case AArch64::LDRSWui:
3927 return AArch64::LDURSWi;
3928 case AArch64::STRWui:
3929 return AArch64::STURWi;
3930 case AArch64::LDRHui:
3931 return AArch64::LDURHi;
3932 case AArch64::STRHui:
3933 return AArch64::STURHi;
3934 case AArch64::LDRHHui:
3935 return AArch64::LDURHHi;
3936 case AArch64::STRHHui:
3937 return AArch64::STURHHi;
3938 case AArch64::LDRSHXui:
3939 return AArch64::LDURSHXi;
3940 case AArch64::LDRSHWui:
3941 return AArch64::LDURSHWi;
3942 case AArch64::LDRBBui:
3943 return AArch64::LDURBBi;
3944 case AArch64::LDRBui:
3945 return AArch64::LDURBi;
3946 case AArch64::STRBBui:
3947 return AArch64::STURBBi;
3948 case AArch64::STRBui:
3949 return AArch64::STURBi;
3950 case AArch64::LDRSBWui:
3951 return AArch64::LDURSBWi;
3952 case AArch64::LDRSBXui:
3953 return AArch64::LDURSBXi;
3954 }
3955}
3956
3957// Given the opcode of a memory load/store instruction, return the opcode of an
3958// instruction performing the same operation, but using
3959// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3960// offset register.
3961static unsigned offsetExtendOpcode(unsigned Opcode) {
3962 switch (Opcode) {
3963 default:
3964 llvm_unreachable("Address folding not implemented for instruction");
3965
3966 case AArch64::LDRQroX:
3967 case AArch64::LDURQi:
3968 case AArch64::LDRQui:
3969 return AArch64::LDRQroW;
3970 case AArch64::STRQroX:
3971 case AArch64::STURQi:
3972 case AArch64::STRQui:
3973 return AArch64::STRQroW;
3974 case AArch64::LDRDroX:
3975 case AArch64::LDURDi:
3976 case AArch64::LDRDui:
3977 return AArch64::LDRDroW;
3978 case AArch64::STRDroX:
3979 case AArch64::STURDi:
3980 case AArch64::STRDui:
3981 return AArch64::STRDroW;
3982 case AArch64::LDRXroX:
3983 case AArch64::LDURXi:
3984 case AArch64::LDRXui:
3985 return AArch64::LDRXroW;
3986 case AArch64::STRXroX:
3987 case AArch64::STURXi:
3988 case AArch64::STRXui:
3989 return AArch64::STRXroW;
3990 case AArch64::LDRWroX:
3991 case AArch64::LDURWi:
3992 case AArch64::LDRWui:
3993 return AArch64::LDRWroW;
3994 case AArch64::LDRSWroX:
3995 case AArch64::LDURSWi:
3996 case AArch64::LDRSWui:
3997 return AArch64::LDRSWroW;
3998 case AArch64::STRWroX:
3999 case AArch64::STURWi:
4000 case AArch64::STRWui:
4001 return AArch64::STRWroW;
4002 case AArch64::LDRHroX:
4003 case AArch64::LDURHi:
4004 case AArch64::LDRHui:
4005 return AArch64::LDRHroW;
4006 case AArch64::STRHroX:
4007 case AArch64::STURHi:
4008 case AArch64::STRHui:
4009 return AArch64::STRHroW;
4010 case AArch64::LDRHHroX:
4011 case AArch64::LDURHHi:
4012 case AArch64::LDRHHui:
4013 return AArch64::LDRHHroW;
4014 case AArch64::STRHHroX:
4015 case AArch64::STURHHi:
4016 case AArch64::STRHHui:
4017 return AArch64::STRHHroW;
4018 case AArch64::LDRSHXroX:
4019 case AArch64::LDURSHXi:
4020 case AArch64::LDRSHXui:
4021 return AArch64::LDRSHXroW;
4022 case AArch64::LDRSHWroX:
4023 case AArch64::LDURSHWi:
4024 case AArch64::LDRSHWui:
4025 return AArch64::LDRSHWroW;
4026 case AArch64::LDRBroX:
4027 case AArch64::LDURBi:
4028 case AArch64::LDRBui:
4029 return AArch64::LDRBroW;
4030 case AArch64::LDRBBroX:
4031 case AArch64::LDURBBi:
4032 case AArch64::LDRBBui:
4033 return AArch64::LDRBBroW;
4034 case AArch64::LDRSBXroX:
4035 case AArch64::LDURSBXi:
4036 case AArch64::LDRSBXui:
4037 return AArch64::LDRSBXroW;
4038 case AArch64::LDRSBWroX:
4039 case AArch64::LDURSBWi:
4040 case AArch64::LDRSBWui:
4041 return AArch64::LDRSBWroW;
4042 case AArch64::STRBroX:
4043 case AArch64::STURBi:
4044 case AArch64::STRBui:
4045 return AArch64::STRBroW;
4046 case AArch64::STRBBroX:
4047 case AArch64::STURBBi:
4048 case AArch64::STRBBui:
4049 return AArch64::STRBBroW;
4050 }
4051}
4052
4054 const ExtAddrMode &AM) const {
4055
4056 const DebugLoc &DL = MemI.getDebugLoc();
4057 MachineBasicBlock &MBB = *MemI.getParent();
4059
4061 if (AM.ScaledReg) {
4062 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
4063 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
4064 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4065 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4066 .addReg(MemI.getOperand(0).getReg(),
4067 MemI.mayLoad() ? RegState::Define : 0)
4068 .addReg(AM.BaseReg)
4069 .addReg(AM.ScaledReg)
4070 .addImm(0)
4071 .addImm(AM.Scale > 1)
4072 .setMemRefs(MemI.memoperands())
4073 .setMIFlags(MemI.getFlags());
4074 return B.getInstr();
4075 }
4076
4077 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
4078 "Addressing mode not supported for folding");
4079
4080 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
4081 unsigned Scale = 1;
4082 unsigned Opcode = MemI.getOpcode();
4083 if (isInt<9>(AM.Displacement))
4084 Opcode = unscaledOffsetOpcode(Opcode);
4085 else
4086 Opcode = scaledOffsetOpcode(Opcode, Scale);
4087
4088 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4089 .addReg(MemI.getOperand(0).getReg(),
4090 MemI.mayLoad() ? RegState::Define : 0)
4091 .addReg(AM.BaseReg)
4092 .addImm(AM.Displacement / Scale)
4093 .setMemRefs(MemI.memoperands())
4094 .setMIFlags(MemI.getFlags());
4095 return B.getInstr();
4096 }
4097
4100 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
4101 assert(AM.ScaledReg && !AM.Displacement &&
4102 "Address offset can be a register or an immediate, but not both");
4103 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
4104 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4105 // Make sure the offset register is in the correct register class.
4106 Register OffsetReg = AM.ScaledReg;
4107 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
4108 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
4109 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4110 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
4111 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
4112 }
4113 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4114 .addReg(MemI.getOperand(0).getReg(),
4115 MemI.mayLoad() ? RegState::Define : 0)
4116 .addReg(AM.BaseReg)
4117 .addReg(OffsetReg)
4119 .addImm(AM.Scale != 1)
4120 .setMemRefs(MemI.memoperands())
4121 .setMIFlags(MemI.getFlags());
4122
4123 return B.getInstr();
4124 }
4125
4127 "Function must not be called with an addressing mode it can't handle");
4128}
4129
4130/// Return true if the opcode is a post-index ld/st instruction, which really
4131/// loads from base+0.
4132static bool isPostIndexLdStOpcode(unsigned Opcode) {
4133 switch (Opcode) {
4134 default:
4135 return false;
4136 case AArch64::LD1Fourv16b_POST:
4137 case AArch64::LD1Fourv1d_POST:
4138 case AArch64::LD1Fourv2d_POST:
4139 case AArch64::LD1Fourv2s_POST:
4140 case AArch64::LD1Fourv4h_POST:
4141 case AArch64::LD1Fourv4s_POST:
4142 case AArch64::LD1Fourv8b_POST:
4143 case AArch64::LD1Fourv8h_POST:
4144 case AArch64::LD1Onev16b_POST:
4145 case AArch64::LD1Onev1d_POST:
4146 case AArch64::LD1Onev2d_POST:
4147 case AArch64::LD1Onev2s_POST:
4148 case AArch64::LD1Onev4h_POST:
4149 case AArch64::LD1Onev4s_POST:
4150 case AArch64::LD1Onev8b_POST:
4151 case AArch64::LD1Onev8h_POST:
4152 case AArch64::LD1Rv16b_POST:
4153 case AArch64::LD1Rv1d_POST:
4154 case AArch64::LD1Rv2d_POST:
4155 case AArch64::LD1Rv2s_POST:
4156 case AArch64::LD1Rv4h_POST:
4157 case AArch64::LD1Rv4s_POST:
4158 case AArch64::LD1Rv8b_POST:
4159 case AArch64::LD1Rv8h_POST:
4160 case AArch64::LD1Threev16b_POST:
4161 case AArch64::LD1Threev1d_POST:
4162 case AArch64::LD1Threev2d_POST:
4163 case AArch64::LD1Threev2s_POST:
4164 case AArch64::LD1Threev4h_POST:
4165 case AArch64::LD1Threev4s_POST:
4166 case AArch64::LD1Threev8b_POST:
4167 case AArch64::LD1Threev8h_POST:
4168 case AArch64::LD1Twov16b_POST:
4169 case AArch64::LD1Twov1d_POST:
4170 case AArch64::LD1Twov2d_POST:
4171 case AArch64::LD1Twov2s_POST:
4172 case AArch64::LD1Twov4h_POST:
4173 case AArch64::LD1Twov4s_POST:
4174 case AArch64::LD1Twov8b_POST:
4175 case AArch64::LD1Twov8h_POST:
4176 case AArch64::LD1i16_POST:
4177 case AArch64::LD1i32_POST:
4178 case AArch64::LD1i64_POST:
4179 case AArch64::LD1i8_POST:
4180 case AArch64::LD2Rv16b_POST:
4181 case AArch64::LD2Rv1d_POST:
4182 case AArch64::LD2Rv2d_POST:
4183 case AArch64::LD2Rv2s_POST:
4184 case AArch64::LD2Rv4h_POST:
4185 case AArch64::LD2Rv4s_POST:
4186 case AArch64::LD2Rv8b_POST:
4187 case AArch64::LD2Rv8h_POST:
4188 case AArch64::LD2Twov16b_POST:
4189 case AArch64::LD2Twov2d_POST:
4190 case AArch64::LD2Twov2s_POST:
4191 case AArch64::LD2Twov4h_POST:
4192 case AArch64::LD2Twov4s_POST:
4193 case AArch64::LD2Twov8b_POST:
4194 case AArch64::LD2Twov8h_POST:
4195 case AArch64::LD2i16_POST:
4196 case AArch64::LD2i32_POST:
4197 case AArch64::LD2i64_POST:
4198 case AArch64::LD2i8_POST:
4199 case AArch64::LD3Rv16b_POST:
4200 case AArch64::LD3Rv1d_POST:
4201 case AArch64::LD3Rv2d_POST:
4202 case AArch64::LD3Rv2s_POST:
4203 case AArch64::LD3Rv4h_POST:
4204 case AArch64::LD3Rv4s_POST:
4205 case AArch64::LD3Rv8b_POST:
4206 case AArch64::LD3Rv8h_POST:
4207 case AArch64::LD3Threev16b_POST:
4208 case AArch64::LD3Threev2d_POST:
4209 case AArch64::LD3Threev2s_POST:
4210 case AArch64::LD3Threev4h_POST:
4211 case AArch64::LD3Threev4s_POST:
4212 case AArch64::LD3Threev8b_POST:
4213 case AArch64::LD3Threev8h_POST:
4214 case AArch64::LD3i16_POST:
4215 case AArch64::LD3i32_POST:
4216 case AArch64::LD3i64_POST:
4217 case AArch64::LD3i8_POST:
4218 case AArch64::LD4Fourv16b_POST:
4219 case AArch64::LD4Fourv2d_POST:
4220 case AArch64::LD4Fourv2s_POST:
4221 case AArch64::LD4Fourv4h_POST:
4222 case AArch64::LD4Fourv4s_POST:
4223 case AArch64::LD4Fourv8b_POST:
4224 case AArch64::LD4Fourv8h_POST:
4225 case AArch64::LD4Rv16b_POST:
4226 case AArch64::LD4Rv1d_POST:
4227 case AArch64::LD4Rv2d_POST:
4228 case AArch64::LD4Rv2s_POST:
4229 case AArch64::LD4Rv4h_POST:
4230 case AArch64::LD4Rv4s_POST:
4231 case AArch64::LD4Rv8b_POST:
4232 case AArch64::LD4Rv8h_POST:
4233 case AArch64::LD4i16_POST:
4234 case AArch64::LD4i32_POST:
4235 case AArch64::LD4i64_POST:
4236 case AArch64::LD4i8_POST:
4237 case AArch64::LDAPRWpost:
4238 case AArch64::LDAPRXpost:
4239 case AArch64::LDIAPPWpost:
4240 case AArch64::LDIAPPXpost:
4241 case AArch64::LDPDpost:
4242 case AArch64::LDPQpost:
4243 case AArch64::LDPSWpost:
4244 case AArch64::LDPSpost:
4245 case AArch64::LDPWpost:
4246 case AArch64::LDPXpost:
4247 case AArch64::LDRBBpost:
4248 case AArch64::LDRBpost:
4249 case AArch64::LDRDpost:
4250 case AArch64::LDRHHpost:
4251 case AArch64::LDRHpost:
4252 case AArch64::LDRQpost:
4253 case AArch64::LDRSBWpost:
4254 case AArch64::LDRSBXpost:
4255 case AArch64::LDRSHWpost:
4256 case AArch64::LDRSHXpost:
4257 case AArch64::LDRSWpost:
4258 case AArch64::LDRSpost:
4259 case AArch64::LDRWpost:
4260 case AArch64::LDRXpost:
4261 case AArch64::ST1Fourv16b_POST:
4262 case AArch64::ST1Fourv1d_POST:
4263 case AArch64::ST1Fourv2d_POST:
4264 case AArch64::ST1Fourv2s_POST:
4265 case AArch64::ST1Fourv4h_POST:
4266 case AArch64::ST1Fourv4s_POST:
4267 case AArch64::ST1Fourv8b_POST:
4268 case AArch64::ST1Fourv8h_POST:
4269 case AArch64::ST1Onev16b_POST:
4270 case AArch64::ST1Onev1d_POST:
4271 case AArch64::ST1Onev2d_POST:
4272 case AArch64::ST1Onev2s_POST:
4273 case AArch64::ST1Onev4h_POST:
4274 case AArch64::ST1Onev4s_POST:
4275 case AArch64::ST1Onev8b_POST:
4276 case AArch64::ST1Onev8h_POST:
4277 case AArch64::ST1Threev16b_POST:
4278 case AArch64::ST1Threev1d_POST:
4279 case AArch64::ST1Threev2d_POST:
4280 case AArch64::ST1Threev2s_POST:
4281 case AArch64::ST1Threev4h_POST:
4282 case AArch64::ST1Threev4s_POST:
4283 case AArch64::ST1Threev8b_POST:
4284 case AArch64::ST1Threev8h_POST:
4285 case AArch64::ST1Twov16b_POST:
4286 case AArch64::ST1Twov1d_POST:
4287 case AArch64::ST1Twov2d_POST:
4288 case AArch64::ST1Twov2s_POST:
4289 case AArch64::ST1Twov4h_POST:
4290 case AArch64::ST1Twov4s_POST:
4291 case AArch64::ST1Twov8b_POST:
4292 case AArch64::ST1Twov8h_POST:
4293 case AArch64::ST1i16_POST:
4294 case AArch64::ST1i32_POST:
4295 case AArch64::ST1i64_POST:
4296 case AArch64::ST1i8_POST:
4297 case AArch64::ST2GPostIndex:
4298 case AArch64::ST2Twov16b_POST:
4299 case AArch64::ST2Twov2d_POST:
4300 case AArch64::ST2Twov2s_POST:
4301 case AArch64::ST2Twov4h_POST:
4302 case AArch64::ST2Twov4s_POST:
4303 case AArch64::ST2Twov8b_POST:
4304 case AArch64::ST2Twov8h_POST:
4305 case AArch64::ST2i16_POST:
4306 case AArch64::ST2i32_POST:
4307 case AArch64::ST2i64_POST:
4308 case AArch64::ST2i8_POST:
4309 case AArch64::ST3Threev16b_POST:
4310 case AArch64::ST3Threev2d_POST:
4311 case AArch64::ST3Threev2s_POST:
4312 case AArch64::ST3Threev4h_POST:
4313 case AArch64::ST3Threev4s_POST:
4314 case AArch64::ST3Threev8b_POST:
4315 case AArch64::ST3Threev8h_POST:
4316 case AArch64::ST3i16_POST:
4317 case AArch64::ST3i32_POST:
4318 case AArch64::ST3i64_POST:
4319 case AArch64::ST3i8_POST:
4320 case AArch64::ST4Fourv16b_POST:
4321 case AArch64::ST4Fourv2d_POST:
4322 case AArch64::ST4Fourv2s_POST:
4323 case AArch64::ST4Fourv4h_POST:
4324 case AArch64::ST4Fourv4s_POST:
4325 case AArch64::ST4Fourv8b_POST:
4326 case AArch64::ST4Fourv8h_POST:
4327 case AArch64::ST4i16_POST:
4328 case AArch64::ST4i32_POST:
4329 case AArch64::ST4i64_POST:
4330 case AArch64::ST4i8_POST:
4331 case AArch64::STGPostIndex:
4332 case AArch64::STGPpost:
4333 case AArch64::STPDpost:
4334 case AArch64::STPQpost:
4335 case AArch64::STPSpost:
4336 case AArch64::STPWpost:
4337 case AArch64::STPXpost:
4338 case AArch64::STRBBpost:
4339 case AArch64::STRBpost:
4340 case AArch64::STRDpost:
4341 case AArch64::STRHHpost:
4342 case AArch64::STRHpost:
4343 case AArch64::STRQpost:
4344 case AArch64::STRSpost:
4345 case AArch64::STRWpost:
4346 case AArch64::STRXpost:
4347 case AArch64::STZ2GPostIndex:
4348 case AArch64::STZGPostIndex:
4349 return true;
4350 }
4351}
4352
4354 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4355 bool &OffsetIsScalable, TypeSize &Width,
4356 const TargetRegisterInfo *TRI) const {
4357 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4358 // Handle only loads/stores with base register followed by immediate offset.
4359 if (LdSt.getNumExplicitOperands() == 3) {
4360 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4361 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4362 !LdSt.getOperand(2).isImm())
4363 return false;
4364 } else if (LdSt.getNumExplicitOperands() == 4) {
4365 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4366 if (!LdSt.getOperand(1).isReg() ||
4367 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4368 !LdSt.getOperand(3).isImm())
4369 return false;
4370 } else
4371 return false;
4372
4373 // Get the scaling factor for the instruction and set the width for the
4374 // instruction.
4375 TypeSize Scale(0U, false);
4376 int64_t Dummy1, Dummy2;
4377
4378 // If this returns false, then it's an instruction we don't want to handle.
4379 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4380 return false;
4381
4382 // Compute the offset. Offset is calculated as the immediate operand
4383 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4384 // set to 1. Postindex are a special case which have an offset of 0.
4385 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4386 BaseOp = &LdSt.getOperand(2);
4387 Offset = 0;
4388 } else if (LdSt.getNumExplicitOperands() == 3) {
4389 BaseOp = &LdSt.getOperand(1);
4390 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4391 } else {
4392 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4393 BaseOp = &LdSt.getOperand(2);
4394 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4395 }
4396 OffsetIsScalable = Scale.isScalable();
4397
4398 return BaseOp->isReg() || BaseOp->isFI();
4399}
4400
4403 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4404 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4405 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4406 return OfsOp;
4407}
4408
4409bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4410 TypeSize &Width, int64_t &MinOffset,
4411 int64_t &MaxOffset) {
4412 switch (Opcode) {
4413 // Not a memory operation or something we want to handle.
4414 default:
4415 Scale = TypeSize::getFixed(0);
4416 Width = TypeSize::getFixed(0);
4417 MinOffset = MaxOffset = 0;
4418 return false;
4419 // LDR / STR
4420 case AArch64::LDRQui:
4421 case AArch64::STRQui:
4422 Scale = TypeSize::getFixed(16);
4423 Width = TypeSize::getFixed(16);
4424 MinOffset = 0;
4425 MaxOffset = 4095;
4426 break;
4427 case AArch64::LDRXui:
4428 case AArch64::LDRDui:
4429 case AArch64::STRXui:
4430 case AArch64::STRDui:
4431 case AArch64::PRFMui:
4432 Scale = TypeSize::getFixed(8);
4433 Width = TypeSize::getFixed(8);
4434 MinOffset = 0;
4435 MaxOffset = 4095;
4436 break;
4437 case AArch64::LDRWui:
4438 case AArch64::LDRSui:
4439 case AArch64::LDRSWui:
4440 case AArch64::STRWui:
4441 case AArch64::STRSui:
4442 Scale = TypeSize::getFixed(4);
4443 Width = TypeSize::getFixed(4);
4444 MinOffset = 0;
4445 MaxOffset = 4095;
4446 break;
4447 case AArch64::LDRHui:
4448 case AArch64::LDRHHui:
4449 case AArch64::LDRSHWui:
4450 case AArch64::LDRSHXui:
4451 case AArch64::STRHui:
4452 case AArch64::STRHHui:
4453 Scale = TypeSize::getFixed(2);
4454 Width = TypeSize::getFixed(2);
4455 MinOffset = 0;
4456 MaxOffset = 4095;
4457 break;
4458 case AArch64::LDRBui:
4459 case AArch64::LDRBBui:
4460 case AArch64::LDRSBWui:
4461 case AArch64::LDRSBXui:
4462 case AArch64::STRBui:
4463 case AArch64::STRBBui:
4464 Scale = TypeSize::getFixed(1);
4465 Width = TypeSize::getFixed(1);
4466 MinOffset = 0;
4467 MaxOffset = 4095;
4468 break;
4469 // post/pre inc
4470 case AArch64::STRQpre:
4471 case AArch64::LDRQpost:
4472 Scale = TypeSize::getFixed(1);
4473 Width = TypeSize::getFixed(16);
4474 MinOffset = -256;
4475 MaxOffset = 255;
4476 break;
4477 case AArch64::LDRDpost:
4478 case AArch64::LDRDpre:
4479 case AArch64::LDRXpost:
4480 case AArch64::LDRXpre:
4481 case AArch64::STRDpost:
4482 case AArch64::STRDpre:
4483 case AArch64::STRXpost:
4484 case AArch64::STRXpre:
4485 Scale = TypeSize::getFixed(1);
4486 Width = TypeSize::getFixed(8);
4487 MinOffset = -256;
4488 MaxOffset = 255;
4489 break;
4490 case AArch64::STRWpost:
4491 case AArch64::STRWpre:
4492 case AArch64::LDRWpost:
4493 case AArch64::LDRWpre:
4494 case AArch64::STRSpost:
4495 case AArch64::STRSpre:
4496 case AArch64::LDRSpost:
4497 case AArch64::LDRSpre:
4498 Scale = TypeSize::getFixed(1);
4499 Width = TypeSize::getFixed(4);
4500 MinOffset = -256;
4501 MaxOffset = 255;
4502 break;
4503 case AArch64::LDRHpost:
4504 case AArch64::LDRHpre:
4505 case AArch64::STRHpost:
4506 case AArch64::STRHpre:
4507 case AArch64::LDRHHpost:
4508 case AArch64::LDRHHpre:
4509 case AArch64::STRHHpost:
4510 case AArch64::STRHHpre:
4511 Scale = TypeSize::getFixed(1);
4512 Width = TypeSize::getFixed(2);
4513 MinOffset = -256;
4514 MaxOffset = 255;
4515 break;
4516 case AArch64::LDRBpost:
4517 case AArch64::LDRBpre:
4518 case AArch64::STRBpost:
4519 case AArch64::STRBpre:
4520 case AArch64::LDRBBpost:
4521 case AArch64::LDRBBpre:
4522 case AArch64::STRBBpost:
4523 case AArch64::STRBBpre:
4524 Scale = TypeSize::getFixed(1);
4525 Width = TypeSize::getFixed(1);
4526 MinOffset = -256;
4527 MaxOffset = 255;
4528 break;
4529 // Unscaled
4530 case AArch64::LDURQi:
4531 case AArch64::STURQi:
4532 Scale = TypeSize::getFixed(1);
4533 Width = TypeSize::getFixed(16);
4534 MinOffset = -256;
4535 MaxOffset = 255;
4536 break;
4537 case AArch64::LDURXi:
4538 case AArch64::LDURDi:
4539 case AArch64::LDAPURXi:
4540 case AArch64::STURXi:
4541 case AArch64::STURDi:
4542 case AArch64::STLURXi:
4543 case AArch64::PRFUMi:
4544 Scale = TypeSize::getFixed(1);
4545 Width = TypeSize::getFixed(8);
4546 MinOffset = -256;
4547 MaxOffset = 255;
4548 break;
4549 case AArch64::LDURWi:
4550 case AArch64::LDURSi:
4551 case AArch64::LDURSWi:
4552 case AArch64::LDAPURi:
4553 case AArch64::LDAPURSWi:
4554 case AArch64::STURWi:
4555 case AArch64::STURSi:
4556 case AArch64::STLURWi:
4557 Scale = TypeSize::getFixed(1);
4558 Width = TypeSize::getFixed(4);
4559 MinOffset = -256;
4560 MaxOffset = 255;
4561 break;
4562 case AArch64::LDURHi:
4563 case AArch64::LDURHHi:
4564 case AArch64::LDURSHXi:
4565 case AArch64::LDURSHWi:
4566 case AArch64::LDAPURHi:
4567 case AArch64::LDAPURSHWi:
4568 case AArch64::LDAPURSHXi:
4569 case AArch64::STURHi:
4570 case AArch64::STURHHi:
4571 case AArch64::STLURHi:
4572 Scale = TypeSize::getFixed(1);
4573 Width = TypeSize::getFixed(2);
4574 MinOffset = -256;
4575 MaxOffset = 255;
4576 break;
4577 case AArch64::LDURBi:
4578 case AArch64::LDURBBi:
4579 case AArch64::LDURSBXi:
4580 case AArch64::LDURSBWi:
4581 case AArch64::LDAPURBi:
4582 case AArch64::LDAPURSBWi:
4583 case AArch64::LDAPURSBXi:
4584 case AArch64::STURBi:
4585 case AArch64::STURBBi:
4586 case AArch64::STLURBi:
4587 Scale = TypeSize::getFixed(1);
4588 Width = TypeSize::getFixed(1);
4589 MinOffset = -256;
4590 MaxOffset = 255;
4591 break;
4592 // LDP / STP (including pre/post inc)
4593 case AArch64::LDPQi:
4594 case AArch64::LDNPQi:
4595 case AArch64::STPQi:
4596 case AArch64::STNPQi:
4597 case AArch64::LDPQpost:
4598 case AArch64::LDPQpre:
4599 case AArch64::STPQpost:
4600 case AArch64::STPQpre:
4601 Scale = TypeSize::getFixed(16);
4602 Width = TypeSize::getFixed(16 * 2);
4603 MinOffset = -64;
4604 MaxOffset = 63;
4605 break;
4606 case AArch64::LDPXi:
4607 case AArch64::LDPDi:
4608 case AArch64::LDNPXi:
4609 case AArch64::LDNPDi:
4610 case AArch64::STPXi:
4611 case AArch64::STPDi:
4612 case AArch64::STNPXi:
4613 case AArch64::STNPDi:
4614 case AArch64::LDPDpost:
4615 case AArch64::LDPDpre:
4616 case AArch64::LDPXpost:
4617 case AArch64::LDPXpre:
4618 case AArch64::STPDpost:
4619 case AArch64::STPDpre:
4620 case AArch64::STPXpost:
4621 case AArch64::STPXpre:
4622 Scale = TypeSize::getFixed(8);
4623 Width = TypeSize::getFixed(8 * 2);
4624 MinOffset = -64;
4625 MaxOffset = 63;
4626 break;
4627 case AArch64::LDPWi:
4628 case AArch64::LDPSi:
4629 case AArch64::LDNPWi:
4630 case AArch64::LDNPSi:
4631 case AArch64::STPWi:
4632 case AArch64::STPSi:
4633 case AArch64::STNPWi:
4634 case AArch64::STNPSi:
4635 case AArch64::LDPSpost:
4636 case AArch64::LDPSpre:
4637 case AArch64::LDPWpost:
4638 case AArch64::LDPWpre:
4639 case AArch64::STPSpost:
4640 case AArch64::STPSpre:
4641 case AArch64::STPWpost:
4642 case AArch64::STPWpre:
4643 Scale = TypeSize::getFixed(4);
4644 Width = TypeSize::getFixed(4 * 2);
4645 MinOffset = -64;
4646 MaxOffset = 63;
4647 break;
4648 case AArch64::StoreSwiftAsyncContext:
4649 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4650 Scale = TypeSize::getFixed(1);
4651 Width = TypeSize::getFixed(8);
4652 MinOffset = 0;
4653 MaxOffset = 4095;
4654 break;
4655 case AArch64::ADDG:
4656 Scale = TypeSize::getFixed(16);
4657 Width = TypeSize::getFixed(0);
4658 MinOffset = 0;
4659 MaxOffset = 63;
4660 break;
4661 case AArch64::TAGPstack:
4662 Scale = TypeSize::getFixed(16);
4663 Width = TypeSize::getFixed(0);
4664 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4665 // of 63 (not 64!).
4666 MinOffset = -63;
4667 MaxOffset = 63;
4668 break;
4669 case AArch64::LDG:
4670 case AArch64::STGi:
4671 case AArch64::STGPreIndex:
4672 case AArch64::STGPostIndex:
4673 case AArch64::STZGi:
4674 case AArch64::STZGPreIndex:
4675 case AArch64::STZGPostIndex:
4676 Scale = TypeSize::getFixed(16);
4677 Width = TypeSize::getFixed(16);
4678 MinOffset = -256;
4679 MaxOffset = 255;
4680 break;
4681 // SVE
4682 case AArch64::STR_ZZZZXI:
4683 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4684 case AArch64::LDR_ZZZZXI:
4685 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4686 Scale = TypeSize::getScalable(16);
4687 Width = TypeSize::getScalable(16 * 4);
4688 MinOffset = -256;
4689 MaxOffset = 252;
4690 break;
4691 case AArch64::STR_ZZZXI:
4692 case AArch64::LDR_ZZZXI:
4693 Scale = TypeSize::getScalable(16);
4694 Width = TypeSize::getScalable(16 * 3);
4695 MinOffset = -256;
4696 MaxOffset = 253;
4697 break;
4698 case AArch64::STR_ZZXI:
4699 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4700 case AArch64::LDR_ZZXI:
4701 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4702 Scale = TypeSize::getScalable(16);
4703 Width = TypeSize::getScalable(16 * 2);
4704 MinOffset = -256;
4705 MaxOffset = 254;
4706 break;
4707 case AArch64::LDR_PXI:
4708 case AArch64::STR_PXI:
4709 Scale = TypeSize::getScalable(2);
4710 Width = TypeSize::getScalable(2);
4711 MinOffset = -256;
4712 MaxOffset = 255;
4713 break;
4714 case AArch64::LDR_PPXI:
4715 case AArch64::STR_PPXI:
4716 Scale = TypeSize::getScalable(2);
4717 Width = TypeSize::getScalable(2 * 2);
4718 MinOffset = -256;
4719 MaxOffset = 254;
4720 break;
4721 case AArch64::LDR_ZXI:
4722 case AArch64::STR_ZXI:
4723 Scale = TypeSize::getScalable(16);
4724 Width = TypeSize::getScalable(16);
4725 MinOffset = -256;
4726 MaxOffset = 255;
4727 break;
4728 case AArch64::LD1B_IMM:
4729 case AArch64::LD1H_IMM:
4730 case AArch64::LD1W_IMM:
4731 case AArch64::LD1D_IMM:
4732 case AArch64::LDNT1B_ZRI:
4733 case AArch64::LDNT1H_ZRI:
4734 case AArch64::LDNT1W_ZRI:
4735 case AArch64::LDNT1D_ZRI:
4736 case AArch64::ST1B_IMM:
4737 case AArch64::ST1H_IMM:
4738 case AArch64::ST1W_IMM:
4739 case AArch64::ST1D_IMM:
4740 case AArch64::STNT1B_ZRI:
4741 case AArch64::STNT1H_ZRI:
4742 case AArch64::STNT1W_ZRI:
4743 case AArch64::STNT1D_ZRI:
4744 case AArch64::LDNF1B_IMM:
4745 case AArch64::LDNF1H_IMM:
4746 case AArch64::LDNF1W_IMM:
4747 case AArch64::LDNF1D_IMM:
4748 // A full vectors worth of data
4749 // Width = mbytes * elements
4750 Scale = TypeSize::getScalable(16);
4751 Width = TypeSize::getScalable(16);
4752 MinOffset = -8;
4753 MaxOffset = 7;
4754 break;
4755 case AArch64::LD2B_IMM:
4756 case AArch64::LD2H_IMM:
4757 case AArch64::LD2W_IMM:
4758 case AArch64::LD2D_IMM:
4759 case AArch64::ST2B_IMM:
4760 case AArch64::ST2H_IMM:
4761 case AArch64::ST2W_IMM:
4762 case AArch64::ST2D_IMM:
4763 Scale = TypeSize::getScalable(32);
4764 Width = TypeSize::getScalable(16 * 2);
4765 MinOffset = -8;
4766 MaxOffset = 7;
4767 break;
4768 case AArch64::LD3B_IMM:
4769 case AArch64::LD3H_IMM:
4770 case AArch64::LD3W_IMM:
4771 case AArch64::LD3D_IMM:
4772 case AArch64::ST3B_IMM:
4773 case AArch64::ST3H_IMM:
4774 case AArch64::ST3W_IMM:
4775 case AArch64::ST3D_IMM:
4776 Scale = TypeSize::getScalable(48);
4777 Width = TypeSize::getScalable(16 * 3);
4778 MinOffset = -8;
4779 MaxOffset = 7;
4780 break;
4781 case AArch64::LD4B_IMM:
4782 case AArch64::LD4H_IMM:
4783 case AArch64::LD4W_IMM:
4784 case AArch64::LD4D_IMM:
4785 case AArch64::ST4B_IMM:
4786 case AArch64::ST4H_IMM:
4787 case AArch64::ST4W_IMM:
4788 case AArch64::ST4D_IMM:
4789 Scale = TypeSize::getScalable(64);
4790 Width = TypeSize::getScalable(16 * 4);
4791 MinOffset = -8;
4792 MaxOffset = 7;
4793 break;
4794 case AArch64::LD1B_H_IMM:
4795 case AArch64::LD1SB_H_IMM:
4796 case AArch64::LD1H_S_IMM:
4797 case AArch64::LD1SH_S_IMM:
4798 case AArch64::LD1W_D_IMM:
4799 case AArch64::LD1SW_D_IMM:
4800 case AArch64::ST1B_H_IMM:
4801 case AArch64::ST1H_S_IMM:
4802 case AArch64::ST1W_D_IMM:
4803 case AArch64::LDNF1B_H_IMM:
4804 case AArch64::LDNF1SB_H_IMM:
4805 case AArch64::LDNF1H_S_IMM:
4806 case AArch64::LDNF1SH_S_IMM:
4807 case AArch64::LDNF1W_D_IMM:
4808 case AArch64::LDNF1SW_D_IMM:
4809 // A half vector worth of data
4810 // Width = mbytes * elements
4811 Scale = TypeSize::getScalable(8);
4812 Width = TypeSize::getScalable(8);
4813 MinOffset = -8;
4814 MaxOffset = 7;
4815 break;
4816 case AArch64::LD1B_S_IMM:
4817 case AArch64::LD1SB_S_IMM:
4818 case AArch64::LD1H_D_IMM:
4819 case AArch64::LD1SH_D_IMM:
4820 case AArch64::ST1B_S_IMM:
4821 case AArch64::ST1H_D_IMM:
4822 case AArch64::LDNF1B_S_IMM:
4823 case AArch64::LDNF1SB_S_IMM:
4824 case AArch64::LDNF1H_D_IMM:
4825 case AArch64::LDNF1SH_D_IMM:
4826 // A quarter vector worth of data
4827 // Width = mbytes * elements
4828 Scale = TypeSize::getScalable(4);
4829 Width = TypeSize::getScalable(4);
4830 MinOffset = -8;
4831 MaxOffset = 7;
4832 break;
4833 case AArch64::LD1B_D_IMM:
4834 case AArch64::LD1SB_D_IMM:
4835 case AArch64::ST1B_D_IMM:
4836 case AArch64::LDNF1B_D_IMM:
4837 case AArch64::LDNF1SB_D_IMM:
4838 // A eighth vector worth of data
4839 // Width = mbytes * elements
4840 Scale = TypeSize::getScalable(2);
4841 Width = TypeSize::getScalable(2);
4842 MinOffset = -8;
4843 MaxOffset = 7;
4844 break;
4845 case AArch64::ST2Gi:
4846 case AArch64::ST2GPreIndex:
4847 case AArch64::ST2GPostIndex:
4848 case AArch64::STZ2Gi:
4849 case AArch64::STZ2GPreIndex:
4850 case AArch64::STZ2GPostIndex:
4851 Scale = TypeSize::getFixed(16);
4852 Width = TypeSize::getFixed(32);
4853 MinOffset = -256;
4854 MaxOffset = 255;
4855 break;
4856 case AArch64::STGPi:
4857 case AArch64::STGPpost:
4858 case AArch64::STGPpre:
4859 Scale = TypeSize::getFixed(16);
4860 Width = TypeSize::getFixed(16);
4861 MinOffset = -64;
4862 MaxOffset = 63;
4863 break;
4864 case AArch64::LD1RB_IMM:
4865 case AArch64::LD1RB_H_IMM:
4866 case AArch64::LD1RB_S_IMM:
4867 case AArch64::LD1RB_D_IMM:
4868 case AArch64::LD1RSB_H_IMM:
4869 case AArch64::LD1RSB_S_IMM:
4870 case AArch64::LD1RSB_D_IMM:
4871 Scale = TypeSize::getFixed(1);
4872 Width = TypeSize::getFixed(1);
4873 MinOffset = 0;
4874 MaxOffset = 63;
4875 break;
4876 case AArch64::LD1RH_IMM:
4877 case AArch64::LD1RH_S_IMM:
4878 case AArch64::LD1RH_D_IMM:
4879 case AArch64::LD1RSH_S_IMM:
4880 case AArch64::LD1RSH_D_IMM:
4881 Scale = TypeSize::getFixed(2);
4882 Width = TypeSize::getFixed(2);
4883 MinOffset = 0;
4884 MaxOffset = 63;
4885 break;
4886 case AArch64::LD1RW_IMM:
4887 case AArch64::LD1RW_D_IMM:
4888 case AArch64::LD1RSW_IMM:
4889 Scale = TypeSize::getFixed(4);
4890 Width = TypeSize::getFixed(4);
4891 MinOffset = 0;
4892 MaxOffset = 63;
4893 break;
4894 case AArch64::LD1RD_IMM:
4895 Scale = TypeSize::getFixed(8);
4896 Width = TypeSize::getFixed(8);
4897 MinOffset = 0;
4898 MaxOffset = 63;
4899 break;
4900 }
4901
4902 return true;
4903}
4904
4905// Scaling factor for unscaled load or store.
4907 switch (Opc) {
4908 default:
4909 llvm_unreachable("Opcode has unknown scale!");
4910 case AArch64::LDRBBui:
4911 case AArch64::LDURBBi:
4912 case AArch64::LDRSBWui:
4913 case AArch64::LDURSBWi:
4914 case AArch64::STRBBui:
4915 case AArch64::STURBBi:
4916 return 1;
4917 case AArch64::LDRHHui:
4918 case AArch64::LDURHHi:
4919 case AArch64::LDRSHWui:
4920 case AArch64::LDURSHWi:
4921 case AArch64::STRHHui:
4922 case AArch64::STURHHi:
4923 return 2;
4924 case AArch64::LDRSui:
4925 case AArch64::LDURSi:
4926 case AArch64::LDRSpre:
4927 case AArch64::LDRSWui:
4928 case AArch64::LDURSWi:
4929 case AArch64::LDRSWpre:
4930 case AArch64::LDRWpre:
4931 case AArch64::LDRWui:
4932 case AArch64::LDURWi:
4933 case AArch64::STRSui:
4934 case AArch64::STURSi:
4935 case AArch64::STRSpre:
4936 case AArch64::STRWui:
4937 case AArch64::STURWi:
4938 case AArch64::STRWpre:
4939 case AArch64::LDPSi:
4940 case AArch64::LDPSWi:
4941 case AArch64::LDPWi:
4942 case AArch64::STPSi:
4943 case AArch64::STPWi:
4944 return 4;
4945 case AArch64::LDRDui:
4946 case AArch64::LDURDi:
4947 case AArch64::LDRDpre:
4948 case AArch64::LDRXui:
4949 case AArch64::LDURXi:
4950 case AArch64::LDRXpre:
4951 case AArch64::STRDui:
4952 case AArch64::STURDi:
4953 case AArch64::STRDpre:
4954 case AArch64::STRXui:
4955 case AArch64::STURXi:
4956 case AArch64::STRXpre:
4957 case AArch64::LDPDi:
4958 case AArch64::LDPXi:
4959 case AArch64::STPDi:
4960 case AArch64::STPXi:
4961 return 8;
4962 case AArch64::LDRQui:
4963 case AArch64::LDURQi:
4964 case AArch64::STRQui:
4965 case AArch64::STURQi:
4966 case AArch64::STRQpre:
4967 case AArch64::LDPQi:
4968 case AArch64::LDRQpre:
4969 case AArch64::STPQi:
4970 case AArch64::STGi:
4971 case AArch64::STZGi:
4972 case AArch64::ST2Gi:
4973 case AArch64::STZ2Gi:
4974 case AArch64::STGPi:
4975 return 16;
4976 }
4977}
4978
4980 switch (MI.getOpcode()) {
4981 default:
4982 return false;
4983 case AArch64::LDRWpre:
4984 case AArch64::LDRXpre:
4985 case AArch64::LDRSWpre:
4986 case AArch64::LDRSpre:
4987 case AArch64::LDRDpre:
4988 case AArch64::LDRQpre:
4989 return true;
4990 }
4991}
4992
4994 switch (MI.getOpcode()) {
4995 default:
4996 return false;
4997 case AArch64::STRWpre:
4998 case AArch64::STRXpre:
4999 case AArch64::STRSpre:
5000 case AArch64::STRDpre:
5001 case AArch64::STRQpre:
5002 return true;
5003 }
5004}
5005
5007 return isPreLd(MI) || isPreSt(MI);
5008}
5009
5011 switch (MI.getOpcode()) {
5012 default:
5013 return false;
5014 case AArch64::LDPSi:
5015 case AArch64::LDPSWi:
5016 case AArch64::LDPDi:
5017 case AArch64::LDPQi:
5018 case AArch64::LDPWi:
5019 case AArch64::LDPXi:
5020 case AArch64::STPSi:
5021 case AArch64::STPDi:
5022 case AArch64::STPQi:
5023 case AArch64::STPWi:
5024 case AArch64::STPXi:
5025 case AArch64::STGPi:
5026 return true;
5027 }
5028}
5029
5031 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
5032 unsigned Idx =
5034 : 1;
5035 return MI.getOperand(Idx);
5036}
5037
5038const MachineOperand &
5040 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
5041 unsigned Idx =
5043 : 2;
5044 return MI.getOperand(Idx);
5045}
5046
5047const MachineOperand &
5049 switch (MI.getOpcode()) {
5050 default:
5051 llvm_unreachable("Unexpected opcode");
5052 case AArch64::LDRBroX:
5053 case AArch64::LDRBBroX:
5054 case AArch64::LDRSBXroX:
5055 case AArch64::LDRSBWroX:
5056 case AArch64::LDRHroX:
5057 case AArch64::LDRHHroX:
5058 case AArch64::LDRSHXroX:
5059 case AArch64::LDRSHWroX:
5060 case AArch64::LDRWroX:
5061 case AArch64::LDRSroX:
5062 case AArch64::LDRSWroX:
5063 case AArch64::LDRDroX:
5064 case AArch64::LDRXroX:
5065 case AArch64::LDRQroX:
5066 return MI.getOperand(4);
5067 }
5068}
5069
5071 Register Reg) {
5072 if (MI.getParent() == nullptr)
5073 return nullptr;
5074 const MachineFunction *MF = MI.getParent()->getParent();
5075 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
5076}
5077
5079 auto IsHFPR = [&](const MachineOperand &Op) {
5080 if (!Op.isReg())
5081 return false;
5082 auto Reg = Op.getReg();
5083 if (Reg.isPhysical())
5084 return AArch64::FPR16RegClass.contains(Reg);
5085 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5086 return TRC == &AArch64::FPR16RegClass ||
5087 TRC == &AArch64::FPR16_loRegClass;
5088 };
5089 return llvm::any_of(MI.operands(), IsHFPR);
5090}
5091
5093 auto IsQFPR = [&](const MachineOperand &Op) {
5094 if (!Op.isReg())
5095 return false;
5096 auto Reg = Op.getReg();
5097 if (Reg.isPhysical())
5098 return AArch64::FPR128RegClass.contains(Reg);
5099 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5100 return TRC == &AArch64::FPR128RegClass ||
5101 TRC == &AArch64::FPR128_loRegClass;
5102 };
5103 return llvm::any_of(MI.operands(), IsQFPR);
5104}
5105
5107 switch (MI.getOpcode()) {
5108 case AArch64::BRK:
5109 case AArch64::HLT:
5110 case AArch64::PACIASP:
5111 case AArch64::PACIBSP:
5112 // Implicit BTI behavior.
5113 return true;
5114 case AArch64::PAUTH_PROLOGUE:
5115 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
5116 return true;
5117 case AArch64::HINT: {
5118 unsigned Imm = MI.getOperand(0).getImm();
5119 // Explicit BTI instruction.
5120 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
5121 return true;
5122 // PACI(A|B)SP instructions.
5123 if (Imm == 25 || Imm == 27)
5124 return true;
5125 return false;
5126 }
5127 default:
5128 return false;
5129 }
5130}
5131
5133 if (Reg == 0)
5134 return false;
5135 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
5136 return AArch64::FPR128RegClass.contains(Reg) ||
5137 AArch64::FPR64RegClass.contains(Reg) ||
5138 AArch64::FPR32RegClass.contains(Reg) ||
5139 AArch64::FPR16RegClass.contains(Reg) ||
5140 AArch64::FPR8RegClass.contains(Reg);
5141}
5142
5144 auto IsFPR = [&](const MachineOperand &Op) {
5145 if (!Op.isReg())
5146 return false;
5147 auto Reg = Op.getReg();
5148 if (Reg.isPhysical())
5149 return isFpOrNEON(Reg);
5150
5151 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5152 return TRC == &AArch64::FPR128RegClass ||
5153 TRC == &AArch64::FPR128_loRegClass ||
5154 TRC == &AArch64::FPR64RegClass ||
5155 TRC == &AArch64::FPR64_loRegClass ||
5156 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
5157 TRC == &AArch64::FPR8RegClass;
5158 };
5159 return llvm::any_of(MI.operands(), IsFPR);
5160}
5161
5162// Scale the unscaled offsets. Returns false if the unscaled offset can't be
5163// scaled.
5164static bool scaleOffset(unsigned Opc, int64_t &Offset) {
5166
5167 // If the byte-offset isn't a multiple of the stride, we can't scale this
5168 // offset.
5169 if (Offset % Scale != 0)
5170 return false;
5171
5172 // Convert the byte-offset used by unscaled into an "element" offset used
5173 // by the scaled pair load/store instructions.
5174 Offset /= Scale;
5175 return true;
5176}
5177
5178static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
5179 if (FirstOpc == SecondOpc)
5180 return true;
5181 // We can also pair sign-ext and zero-ext instructions.
5182 switch (FirstOpc) {
5183 default:
5184 return false;
5185 case AArch64::STRSui:
5186 case AArch64::STURSi:
5187 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
5188 case AArch64::STRDui:
5189 case AArch64::STURDi:
5190 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
5191 case AArch64::STRQui:
5192 case AArch64::STURQi:
5193 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
5194 case AArch64::STRWui:
5195 case AArch64::STURWi:
5196 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
5197 case AArch64::STRXui:
5198 case AArch64::STURXi:
5199 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
5200 case AArch64::LDRSui:
5201 case AArch64::LDURSi:
5202 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
5203 case AArch64::LDRDui:
5204 case AArch64::LDURDi:
5205 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
5206 case AArch64::LDRQui:
5207 case AArch64::LDURQi:
5208 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
5209 case AArch64::LDRWui:
5210 case AArch64::LDURWi:
5211 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
5212 case AArch64::LDRSWui:
5213 case AArch64::LDURSWi:
5214 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
5215 case AArch64::LDRXui:
5216 case AArch64::LDURXi:
5217 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
5218 }
5219 // These instructions can't be paired based on their opcodes.
5220 return false;
5221}
5222
5223static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
5224 int64_t Offset1, unsigned Opcode1, int FI2,
5225 int64_t Offset2, unsigned Opcode2) {
5226 // Accesses through fixed stack object frame indices may access a different
5227 // fixed stack slot. Check that the object offsets + offsets match.
5228 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
5229 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
5230 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
5231 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
5232 // Convert to scaled object offsets.
5233 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
5234 if (ObjectOffset1 % Scale1 != 0)
5235 return false;
5236 ObjectOffset1 /= Scale1;
5237 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
5238 if (ObjectOffset2 % Scale2 != 0)
5239 return false;
5240 ObjectOffset2 /= Scale2;
5241 ObjectOffset1 += Offset1;
5242 ObjectOffset2 += Offset2;
5243 return ObjectOffset1 + 1 == ObjectOffset2;
5244 }
5245
5246 return FI1 == FI2;
5247}
5248
5249/// Detect opportunities for ldp/stp formation.
5250///
5251/// Only called for LdSt for which getMemOperandWithOffset returns true.
5253 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
5254 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
5255 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
5256 unsigned NumBytes) const {
5257 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
5258 const MachineOperand &BaseOp1 = *BaseOps1.front();
5259 const MachineOperand &BaseOp2 = *BaseOps2.front();
5260 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
5261 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
5262 if (BaseOp1.getType() != BaseOp2.getType())
5263 return false;
5264
5265 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
5266 "Only base registers and frame indices are supported.");
5267
5268 // Check for both base regs and base FI.
5269 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
5270 return false;
5271
5272 // Only cluster up to a single pair.
5273 if (ClusterSize > 2)
5274 return false;
5275
5276 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
5277 return false;
5278
5279 // Can we pair these instructions based on their opcodes?
5280 unsigned FirstOpc = FirstLdSt.getOpcode();
5281 unsigned SecondOpc = SecondLdSt.getOpcode();
5282 if (!canPairLdStOpc(FirstOpc, SecondOpc))
5283 return false;
5284
5285 // Can't merge volatiles or load/stores that have a hint to avoid pair
5286 // formation, for example.
5287 if (!isCandidateToMergeOrPair(FirstLdSt) ||
5288 !isCandidateToMergeOrPair(SecondLdSt))
5289 return false;
5290
5291 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
5292 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
5293 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
5294 return false;
5295
5296 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
5297 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
5298 return false;
5299
5300 // Pairwise instructions have a 7-bit signed offset field.
5301 if (Offset1 > 63 || Offset1 < -64)
5302 return false;
5303
5304 // The caller should already have ordered First/SecondLdSt by offset.
5305 // Note: except for non-equal frame index bases
5306 if (BaseOp1.isFI()) {
5307 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
5308 "Caller should have ordered offsets.");
5309
5310 const MachineFrameInfo &MFI =
5311 FirstLdSt.getParent()->getParent()->getFrameInfo();
5312 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
5313 BaseOp2.getIndex(), Offset2, SecondOpc);
5314 }
5315
5316 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
5317
5318 return Offset1 + 1 == Offset2;
5319}
5320
5322 MCRegister Reg, unsigned SubIdx,
5323 unsigned State,
5324 const TargetRegisterInfo *TRI) {
5325 if (!SubIdx)
5326 return MIB.addReg(Reg, State);
5327
5328 if (Reg.isPhysical())
5329 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
5330 return MIB.addReg(Reg, State, SubIdx);
5331}
5332
5333static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5334 unsigned NumRegs) {
5335 // We really want the positive remainder mod 32 here, that happens to be
5336 // easily obtainable with a mask.
5337 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5338}
5339
5342 const DebugLoc &DL, MCRegister DestReg,
5343 MCRegister SrcReg, bool KillSrc,
5344 unsigned Opcode,
5345 ArrayRef<unsigned> Indices) const {
5346 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5348 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5349 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5350 unsigned NumRegs = Indices.size();
5351
5352 int SubReg = 0, End = NumRegs, Incr = 1;
5353 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5354 SubReg = NumRegs - 1;
5355 End = -1;
5356 Incr = -1;
5357 }
5358
5359 for (; SubReg != End; SubReg += Incr) {
5360 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5361 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5362 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5363 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5364 }
5365}
5366
5369 const DebugLoc &DL, MCRegister DestReg,
5370 MCRegister SrcReg, bool KillSrc,
5371 unsigned Opcode, unsigned ZeroReg,
5372 llvm::ArrayRef<unsigned> Indices) const {
5374 unsigned NumRegs = Indices.size();
5375
5376#ifndef NDEBUG
5377 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5378 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5379 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5380 "GPR reg sequences should not be able to overlap");
5381#endif
5382
5383 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5384 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5385 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5386 MIB.addReg(ZeroReg);
5387 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5388 MIB.addImm(0);
5389 }
5390}
5391
5392/// Returns true if the instruction at I is in a streaming call site region,
5393/// within a single basic block.
5394/// A "call site streaming region" starts after smstart and ends at smstop
5395/// around a call to a streaming function. This walks backward from I.
5398 MachineFunction &MF = *MBB.getParent();
5400 if (!AFI->hasStreamingModeChanges())
5401 return false;
5402 // Walk backwards to find smstart/smstop
5403 for (MachineInstr &MI : reverse(make_range(MBB.begin(), I))) {
5404 unsigned Opc = MI.getOpcode();
5405 if (Opc == AArch64::MSRpstatesvcrImm1 || Opc == AArch64::MSRpstatePseudo) {
5406 // Check if this is SM change (not ZA)
5407 int64_t PState = MI.getOperand(0).getImm();
5408 if (PState == AArch64SVCR::SVCRSM || PState == AArch64SVCR::SVCRSMZA) {
5409 // Operand 1 is 1 for start, 0 for stop
5410 return MI.getOperand(1).getImm() == 1;
5411 }
5412 }
5413 }
5414 return false;
5415}
5416
5417/// Returns true if in a streaming call site region without SME-FA64.
5418static bool mustAvoidNeonAtMBBI(const AArch64Subtarget &Subtarget,
5421 return !Subtarget.hasSMEFA64() && isInStreamingCallSiteRegion(MBB, I);
5422}
5423
5426 const DebugLoc &DL, Register DestReg,
5427 Register SrcReg, bool KillSrc,
5428 bool RenamableDest,
5429 bool RenamableSrc) const {
5430 ++NumCopyInstrs;
5431 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5432 AArch64::GPR32spRegClass.contains(SrcReg)) {
5433 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5434 // If either operand is WSP, expand to ADD #0.
5435 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5436 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5437 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5438 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5439 &AArch64::GPR64spRegClass);
5440 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5441 &AArch64::GPR64spRegClass);
5442 // This instruction is reading and writing X registers. This may upset
5443 // the register scavenger and machine verifier, so we need to indicate
5444 // that we are reading an undefined value from SrcRegX, but a proper
5445 // value from SrcReg.
5446 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5447 .addReg(SrcRegX, RegState::Undef)
5448 .addImm(0)
5450 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5451 ++NumZCRegMoveInstrsGPR;
5452 } else {
5453 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5454 .addReg(SrcReg, getKillRegState(KillSrc))
5455 .addImm(0)
5457 if (Subtarget.hasZeroCycleRegMoveGPR32())
5458 ++NumZCRegMoveInstrsGPR;
5459 }
5460 } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5461 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5462 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5463 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5464 &AArch64::GPR64spRegClass);
5465 assert(DestRegX.isValid() && "Destination super-reg not valid");
5466 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5467 &AArch64::GPR64spRegClass);
5468 assert(SrcRegX.isValid() && "Source super-reg not valid");
5469 // This instruction is reading and writing X registers. This may upset
5470 // the register scavenger and machine verifier, so we need to indicate
5471 // that we are reading an undefined value from SrcRegX, but a proper
5472 // value from SrcReg.
5473 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5474 .addReg(AArch64::XZR)
5475 .addReg(SrcRegX, RegState::Undef)
5476 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5477 ++NumZCRegMoveInstrsGPR;
5478 } else {
5479 // Otherwise, expand to ORR WZR.
5480 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5481 .addReg(AArch64::WZR)
5482 .addReg(SrcReg, getKillRegState(KillSrc));
5483 if (Subtarget.hasZeroCycleRegMoveGPR32())
5484 ++NumZCRegMoveInstrsGPR;
5485 }
5486 return;
5487 }
5488
5489 // GPR32 zeroing
5490 if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
5491 if (Subtarget.hasZeroCycleZeroingGPR64() &&
5492 !Subtarget.hasZeroCycleZeroingGPR32()) {
5493 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5494 &AArch64::GPR64spRegClass);
5495 assert(DestRegX.isValid() && "Destination super-reg not valid");
5496 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX)
5497 .addImm(0)
5499 ++NumZCZeroingInstrsGPR;
5500 } else if (Subtarget.hasZeroCycleZeroingGPR32()) {
5501 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5502 .addImm(0)
5504 ++NumZCZeroingInstrsGPR;
5505 } else {
5506 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5507 .addReg(AArch64::WZR)
5508 .addReg(AArch64::WZR);
5509 }
5510 return;
5511 }
5512
5513 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5514 AArch64::GPR64spRegClass.contains(SrcReg)) {
5515 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5516 // If either operand is SP, expand to ADD #0.
5517 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5518 .addReg(SrcReg, getKillRegState(KillSrc))
5519 .addImm(0)
5521 if (Subtarget.hasZeroCycleRegMoveGPR64())
5522 ++NumZCRegMoveInstrsGPR;
5523 } else {
5524 // Otherwise, expand to ORR XZR.
5525 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5526 .addReg(AArch64::XZR)
5527 .addReg(SrcReg, getKillRegState(KillSrc));
5528 if (Subtarget.hasZeroCycleRegMoveGPR64())
5529 ++NumZCRegMoveInstrsGPR;
5530 }
5531 return;
5532 }
5533
5534 // GPR64 zeroing
5535 if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
5536 if (Subtarget.hasZeroCycleZeroingGPR64()) {
5537 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5538 .addImm(0)
5540 ++NumZCZeroingInstrsGPR;
5541 } else {
5542 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5543 .addReg(AArch64::XZR)
5544 .addReg(AArch64::XZR);
5545 }
5546 return;
5547 }
5548
5549 // Copy a Predicate register by ORRing with itself.
5550 if (AArch64::PPRRegClass.contains(DestReg) &&
5551 AArch64::PPRRegClass.contains(SrcReg)) {
5552 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5553 "Unexpected SVE register.");
5554 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5555 .addReg(SrcReg) // Pg
5556 .addReg(SrcReg)
5557 .addReg(SrcReg, getKillRegState(KillSrc));
5558 return;
5559 }
5560
5561 // Copy a predicate-as-counter register by ORRing with itself as if it
5562 // were a regular predicate (mask) register.
5563 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5564 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5565 if (DestIsPNR || SrcIsPNR) {
5566 auto ToPPR = [](MCRegister R) -> MCRegister {
5567 return (R - AArch64::PN0) + AArch64::P0;
5568 };
5569 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5570 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5571
5572 if (PPRSrcReg != PPRDestReg) {
5573 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5574 .addReg(PPRSrcReg) // Pg
5575 .addReg(PPRSrcReg)
5576 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5577 if (DestIsPNR)
5578 NewMI.addDef(DestReg, RegState::Implicit);
5579 }
5580 return;
5581 }
5582
5583 // Copy a Z register by ORRing with itself.
5584 if (AArch64::ZPRRegClass.contains(DestReg) &&
5585 AArch64::ZPRRegClass.contains(SrcReg)) {
5586 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5587 "Unexpected SVE register.");
5588 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5589 .addReg(SrcReg)
5590 .addReg(SrcReg, getKillRegState(KillSrc));
5591 return;
5592 }
5593
5594 // Copy a Z register pair by copying the individual sub-registers.
5595 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5596 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5597 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5598 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5599 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5600 "Unexpected SVE register.");
5601 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5602 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5603 Indices);
5604 return;
5605 }
5606
5607 // Copy a Z register triple by copying the individual sub-registers.
5608 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5609 AArch64::ZPR3RegClass.contains(SrcReg)) {
5610 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5611 "Unexpected SVE register.");
5612 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5613 AArch64::zsub2};
5614 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5615 Indices);
5616 return;
5617 }
5618
5619 // Copy a Z register quad by copying the individual sub-registers.
5620 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5621 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5622 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5623 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5624 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5625 "Unexpected SVE register.");
5626 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5627 AArch64::zsub2, AArch64::zsub3};
5628 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5629 Indices);
5630 return;
5631 }
5632
5633 // Copy a DDDD register quad by copying the individual sub-registers.
5634 if (AArch64::DDDDRegClass.contains(DestReg) &&
5635 AArch64::DDDDRegClass.contains(SrcReg)) {
5636 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5637 AArch64::dsub2, AArch64::dsub3};
5638 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5639 Indices);
5640 return;
5641 }
5642
5643 // Copy a DDD register triple by copying the individual sub-registers.
5644 if (AArch64::DDDRegClass.contains(DestReg) &&
5645 AArch64::DDDRegClass.contains(SrcReg)) {
5646 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5647 AArch64::dsub2};
5648 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5649 Indices);
5650 return;
5651 }
5652
5653 // Copy a DD register pair by copying the individual sub-registers.
5654 if (AArch64::DDRegClass.contains(DestReg) &&
5655 AArch64::DDRegClass.contains(SrcReg)) {
5656 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5657 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5658 Indices);
5659 return;
5660 }
5661
5662 // Copy a QQQQ register quad by copying the individual sub-registers.
5663 if (AArch64::QQQQRegClass.contains(DestReg) &&
5664 AArch64::QQQQRegClass.contains(SrcReg)) {
5665 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5666 AArch64::qsub2, AArch64::qsub3};
5667 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5668 Indices);
5669 return;
5670 }
5671
5672 // Copy a QQQ register triple by copying the individual sub-registers.
5673 if (AArch64::QQQRegClass.contains(DestReg) &&
5674 AArch64::QQQRegClass.contains(SrcReg)) {
5675 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5676 AArch64::qsub2};
5677 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5678 Indices);
5679 return;
5680 }
5681
5682 // Copy a QQ register pair by copying the individual sub-registers.
5683 if (AArch64::QQRegClass.contains(DestReg) &&
5684 AArch64::QQRegClass.contains(SrcReg)) {
5685 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5686 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5687 Indices);
5688 return;
5689 }
5690
5691 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5692 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5693 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5694 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5695 AArch64::XZR, Indices);
5696 return;
5697 }
5698
5699 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5700 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5701 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5702 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5703 AArch64::WZR, Indices);
5704 return;
5705 }
5706
5707 if (AArch64::FPR128RegClass.contains(DestReg) &&
5708 AArch64::FPR128RegClass.contains(SrcReg)) {
5709 // In streaming regions, NEON is illegal but streaming-SVE is available.
5710 // Use SVE for copies if we're in a streaming region and SME is available.
5711 // With +sme-fa64, NEON is legal in streaming mode so we can use it.
5712 if ((Subtarget.isSVEorStreamingSVEAvailable() &&
5713 !Subtarget.isNeonAvailable()) ||
5714 mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5715 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5716 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5717 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5718 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5719 } else if (Subtarget.isNeonAvailable()) {
5720 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5721 .addReg(SrcReg)
5722 .addReg(SrcReg, getKillRegState(KillSrc));
5723 if (Subtarget.hasZeroCycleRegMoveFPR128())
5724 ++NumZCRegMoveInstrsFPR;
5725 } else {
5726 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5727 .addReg(AArch64::SP, RegState::Define)
5728 .addReg(SrcReg, getKillRegState(KillSrc))
5729 .addReg(AArch64::SP)
5730 .addImm(-16);
5731 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5732 .addReg(AArch64::SP, RegState::Define)
5733 .addReg(DestReg, RegState::Define)
5734 .addReg(AArch64::SP)
5735 .addImm(16);
5736 }
5737 return;
5738 }
5739
5740 if (AArch64::FPR64RegClass.contains(DestReg) &&
5741 AArch64::FPR64RegClass.contains(SrcReg)) {
5742 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5743 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5744 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
5745 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5746 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
5747 &AArch64::FPR128RegClass);
5748 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
5749 &AArch64::FPR128RegClass);
5750 // This instruction is reading and writing Q registers. This may upset
5751 // the register scavenger and machine verifier, so we need to indicate
5752 // that we are reading an undefined value from SrcRegQ, but a proper
5753 // value from SrcReg.
5754 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5755 .addReg(SrcRegQ, RegState::Undef)
5756 .addReg(SrcRegQ, RegState::Undef)
5757 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5758 ++NumZCRegMoveInstrsFPR;
5759 } else {
5760 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5761 .addReg(SrcReg, getKillRegState(KillSrc));
5762 if (Subtarget.hasZeroCycleRegMoveFPR64())
5763 ++NumZCRegMoveInstrsFPR;
5764 }
5765 return;
5766 }
5767
5768 if (AArch64::FPR32RegClass.contains(DestReg) &&
5769 AArch64::FPR32RegClass.contains(SrcReg)) {
5770 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5771 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5772 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
5773 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5774 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5775 &AArch64::FPR128RegClass);
5776 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5777 &AArch64::FPR128RegClass);
5778 // This instruction is reading and writing Q registers. This may upset
5779 // the register scavenger and machine verifier, so we need to indicate
5780 // that we are reading an undefined value from SrcRegQ, but a proper
5781 // value from SrcReg.
5782 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5783 .addReg(SrcRegQ, RegState::Undef)
5784 .addReg(SrcRegQ, RegState::Undef)
5785 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5786 ++NumZCRegMoveInstrsFPR;
5787 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5788 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5789 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5790 &AArch64::FPR64RegClass);
5791 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5792 &AArch64::FPR64RegClass);
5793 // This instruction is reading and writing D registers. This may upset
5794 // the register scavenger and machine verifier, so we need to indicate
5795 // that we are reading an undefined value from SrcRegD, but a proper
5796 // value from SrcReg.
5797 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5798 .addReg(SrcRegD, RegState::Undef)
5799 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5800 ++NumZCRegMoveInstrsFPR;
5801 } else {
5802 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5803 .addReg(SrcReg, getKillRegState(KillSrc));
5804 if (Subtarget.hasZeroCycleRegMoveFPR32())
5805 ++NumZCRegMoveInstrsFPR;
5806 }
5807 return;
5808 }
5809
5810 if (AArch64::FPR16RegClass.contains(DestReg) &&
5811 AArch64::FPR16RegClass.contains(SrcReg)) {
5812 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5813 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5814 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
5815 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5816 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5817 &AArch64::FPR128RegClass);
5818 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5819 &AArch64::FPR128RegClass);
5820 // This instruction is reading and writing Q registers. This may upset
5821 // the register scavenger and machine verifier, so we need to indicate
5822 // that we are reading an undefined value from SrcRegQ, but a proper
5823 // value from SrcReg.
5824 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5825 .addReg(SrcRegQ, RegState::Undef)
5826 .addReg(SrcRegQ, RegState::Undef)
5827 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5828 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5829 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5830 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5831 &AArch64::FPR64RegClass);
5832 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5833 &AArch64::FPR64RegClass);
5834 // This instruction is reading and writing D registers. This may upset
5835 // the register scavenger and machine verifier, so we need to indicate
5836 // that we are reading an undefined value from SrcRegD, but a proper
5837 // value from SrcReg.
5838 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5839 .addReg(SrcRegD, RegState::Undef)
5840 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5841 } else {
5842 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5843 &AArch64::FPR32RegClass);
5844 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5845 &AArch64::FPR32RegClass);
5846 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5847 .addReg(SrcReg, getKillRegState(KillSrc));
5848 }
5849 return;
5850 }
5851
5852 if (AArch64::FPR8RegClass.contains(DestReg) &&
5853 AArch64::FPR8RegClass.contains(SrcReg)) {
5854 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5855 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5856 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable() &&
5857 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5858 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5859 &AArch64::FPR128RegClass);
5860 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5861 &AArch64::FPR128RegClass);
5862 // This instruction is reading and writing Q registers. This may upset
5863 // the register scavenger and machine verifier, so we need to indicate
5864 // that we are reading an undefined value from SrcRegQ, but a proper
5865 // value from SrcReg.
5866 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5867 .addReg(SrcRegQ, RegState::Undef)
5868 .addReg(SrcRegQ, RegState::Undef)
5869 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5870 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5871 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5872 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5873 &AArch64::FPR64RegClass);
5874 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5875 &AArch64::FPR64RegClass);
5876 // This instruction is reading and writing D registers. This may upset
5877 // the register scavenger and machine verifier, so we need to indicate
5878 // that we are reading an undefined value from SrcRegD, but a proper
5879 // value from SrcReg.
5880 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5881 .addReg(SrcRegD, RegState::Undef)
5882 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5883 } else {
5884 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5885 &AArch64::FPR32RegClass);
5886 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5887 &AArch64::FPR32RegClass);
5888 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5889 .addReg(SrcReg, getKillRegState(KillSrc));
5890 }
5891 return;
5892 }
5893
5894 // Copies between GPR64 and FPR64.
5895 if (AArch64::FPR64RegClass.contains(DestReg) &&
5896 AArch64::GPR64RegClass.contains(SrcReg)) {
5897 if (AArch64::XZR == SrcReg) {
5898 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5899 } else {
5900 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5901 .addReg(SrcReg, getKillRegState(KillSrc));
5902 }
5903 return;
5904 }
5905 if (AArch64::GPR64RegClass.contains(DestReg) &&
5906 AArch64::FPR64RegClass.contains(SrcReg)) {
5907 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5908 .addReg(SrcReg, getKillRegState(KillSrc));
5909 return;
5910 }
5911 // Copies between GPR32 and FPR32.
5912 if (AArch64::FPR32RegClass.contains(DestReg) &&
5913 AArch64::GPR32RegClass.contains(SrcReg)) {
5914 if (AArch64::WZR == SrcReg) {
5915 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5916 } else {
5917 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5918 .addReg(SrcReg, getKillRegState(KillSrc));
5919 }
5920 return;
5921 }
5922 if (AArch64::GPR32RegClass.contains(DestReg) &&
5923 AArch64::FPR32RegClass.contains(SrcReg)) {
5924 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5925 .addReg(SrcReg, getKillRegState(KillSrc));
5926 return;
5927 }
5928
5929 if (DestReg == AArch64::NZCV) {
5930 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5931 BuildMI(MBB, I, DL, get(AArch64::MSR))
5932 .addImm(AArch64SysReg::NZCV)
5933 .addReg(SrcReg, getKillRegState(KillSrc))
5934 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5935 return;
5936 }
5937
5938 if (SrcReg == AArch64::NZCV) {
5939 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5940 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5941 .addImm(AArch64SysReg::NZCV)
5942 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5943 return;
5944 }
5945
5946#ifndef NDEBUG
5947 errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)
5948 << "\n";
5949#endif
5950 llvm_unreachable("unimplemented reg-to-reg copy");
5951}
5952
5955 MachineBasicBlock::iterator InsertBefore,
5956 const MCInstrDesc &MCID,
5957 Register SrcReg, bool IsKill,
5958 unsigned SubIdx0, unsigned SubIdx1, int FI,
5959 MachineMemOperand *MMO) {
5960 Register SrcReg0 = SrcReg;
5961 Register SrcReg1 = SrcReg;
5962 if (SrcReg.isPhysical()) {
5963 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5964 SubIdx0 = 0;
5965 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5966 SubIdx1 = 0;
5967 }
5968 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5969 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5970 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5971 .addFrameIndex(FI)
5972 .addImm(0)
5973 .addMemOperand(MMO);
5974}
5975
5978 Register SrcReg, bool isKill, int FI,
5979 const TargetRegisterClass *RC,
5980 Register VReg,
5981 MachineInstr::MIFlag Flags) const {
5982 MachineFunction &MF = *MBB.getParent();
5983 MachineFrameInfo &MFI = MF.getFrameInfo();
5984
5986 MachineMemOperand *MMO =
5988 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5989 unsigned Opc = 0;
5990 bool Offset = true;
5992 unsigned StackID = TargetStackID::Default;
5993 switch (RI.getSpillSize(*RC)) {
5994 case 1:
5995 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5996 Opc = AArch64::STRBui;
5997 break;
5998 case 2: {
5999 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
6000 Opc = AArch64::STRHui;
6001 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
6002 AArch64::PPRRegClass.hasSubClassEq(RC)) {
6003 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6004 "Unexpected register store without SVE store instructions");
6005 Opc = AArch64::STR_PXI;
6007 }
6008 break;
6009 }
6010 case 4:
6011 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
6012 Opc = AArch64::STRWui;
6013 if (SrcReg.isVirtual())
6014 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
6015 else
6016 assert(SrcReg != AArch64::WSP);
6017 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6018 Opc = AArch64::STRSui;
6019 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
6020 Opc = AArch64::STR_PPXI;
6022 }
6023 break;
6024 case 8:
6025 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
6026 Opc = AArch64::STRXui;
6027 if (SrcReg.isVirtual())
6028 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6029 else
6030 assert(SrcReg != AArch64::SP);
6031 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
6032 Opc = AArch64::STRDui;
6033 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
6035 get(AArch64::STPWi), SrcReg, isKill,
6036 AArch64::sube32, AArch64::subo32, FI, MMO);
6037 return;
6038 }
6039 break;
6040 case 16:
6041 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
6042 Opc = AArch64::STRQui;
6043 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
6044 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6045 Opc = AArch64::ST1Twov1d;
6046 Offset = false;
6047 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
6049 get(AArch64::STPXi), SrcReg, isKill,
6050 AArch64::sube64, AArch64::subo64, FI, MMO);
6051 return;
6052 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
6053 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6054 "Unexpected register store without SVE store instructions");
6055 Opc = AArch64::STR_ZXI;
6057 }
6058 break;
6059 case 24:
6060 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
6061 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6062 Opc = AArch64::ST1Threev1d;
6063 Offset = false;
6064 }
6065 break;
6066 case 32:
6067 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6068 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6069 Opc = AArch64::ST1Fourv1d;
6070 Offset = false;
6071 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6072 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6073 Opc = AArch64::ST1Twov2d;
6074 Offset = false;
6075 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6076 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6077 "Unexpected register store without SVE store instructions");
6078 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
6080 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6081 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6082 "Unexpected register store without SVE store instructions");
6083 Opc = AArch64::STR_ZZXI;
6085 }
6086 break;
6087 case 48:
6088 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6089 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6090 Opc = AArch64::ST1Threev2d;
6091 Offset = false;
6092 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6093 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6094 "Unexpected register store without SVE store instructions");
6095 Opc = AArch64::STR_ZZZXI;
6097 }
6098 break;
6099 case 64:
6100 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6101 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6102 Opc = AArch64::ST1Fourv2d;
6103 Offset = false;
6104 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6105 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6106 "Unexpected register store without SVE store instructions");
6107 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
6109 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6110 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6111 "Unexpected register store without SVE store instructions");
6112 Opc = AArch64::STR_ZZZZXI;
6114 }
6115 break;
6116 }
6117 assert(Opc && "Unknown register class");
6118 MFI.setStackID(FI, StackID);
6119
6121 .addReg(SrcReg, getKillRegState(isKill))
6122 .addFrameIndex(FI);
6123
6124 if (Offset)
6125 MI.addImm(0);
6126 if (PNRReg.isValid())
6127 MI.addDef(PNRReg, RegState::Implicit);
6128 MI.addMemOperand(MMO);
6129}
6130
6133 MachineBasicBlock::iterator InsertBefore,
6134 const MCInstrDesc &MCID,
6135 Register DestReg, unsigned SubIdx0,
6136 unsigned SubIdx1, int FI,
6137 MachineMemOperand *MMO) {
6138 Register DestReg0 = DestReg;
6139 Register DestReg1 = DestReg;
6140 bool IsUndef = true;
6141 if (DestReg.isPhysical()) {
6142 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
6143 SubIdx0 = 0;
6144 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
6145 SubIdx1 = 0;
6146 IsUndef = false;
6147 }
6148 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
6149 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
6150 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
6151 .addFrameIndex(FI)
6152 .addImm(0)
6153 .addMemOperand(MMO);
6154}
6155
6158 Register DestReg, int FI,
6159 const TargetRegisterClass *RC,
6160 Register VReg,
6161 MachineInstr::MIFlag Flags) const {
6162 MachineFunction &MF = *MBB.getParent();
6163 MachineFrameInfo &MFI = MF.getFrameInfo();
6165 MachineMemOperand *MMO =
6167 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
6168
6169 unsigned Opc = 0;
6170 bool Offset = true;
6171 unsigned StackID = TargetStackID::Default;
6173 switch (TRI.getSpillSize(*RC)) {
6174 case 1:
6175 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
6176 Opc = AArch64::LDRBui;
6177 break;
6178 case 2: {
6179 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
6180 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
6181 Opc = AArch64::LDRHui;
6182 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
6183 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6184 "Unexpected register load without SVE load instructions");
6185 if (IsPNR)
6186 PNRReg = DestReg;
6187 Opc = AArch64::LDR_PXI;
6189 }
6190 break;
6191 }
6192 case 4:
6193 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
6194 Opc = AArch64::LDRWui;
6195 if (DestReg.isVirtual())
6196 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
6197 else
6198 assert(DestReg != AArch64::WSP);
6199 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6200 Opc = AArch64::LDRSui;
6201 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
6202 Opc = AArch64::LDR_PPXI;
6204 }
6205 break;
6206 case 8:
6207 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
6208 Opc = AArch64::LDRXui;
6209 if (DestReg.isVirtual())
6210 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
6211 else
6212 assert(DestReg != AArch64::SP);
6213 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
6214 Opc = AArch64::LDRDui;
6215 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
6217 get(AArch64::LDPWi), DestReg, AArch64::sube32,
6218 AArch64::subo32, FI, MMO);
6219 return;
6220 }
6221 break;
6222 case 16:
6223 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
6224 Opc = AArch64::LDRQui;
6225 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
6226 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6227 Opc = AArch64::LD1Twov1d;
6228 Offset = false;
6229 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
6231 get(AArch64::LDPXi), DestReg, AArch64::sube64,
6232 AArch64::subo64, FI, MMO);
6233 return;
6234 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
6235 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6236 "Unexpected register load without SVE load instructions");
6237 Opc = AArch64::LDR_ZXI;
6239 }
6240 break;
6241 case 24:
6242 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
6243 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6244 Opc = AArch64::LD1Threev1d;
6245 Offset = false;
6246 }
6247 break;
6248 case 32:
6249 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6250 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6251 Opc = AArch64::LD1Fourv1d;
6252 Offset = false;
6253 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6254 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6255 Opc = AArch64::LD1Twov2d;
6256 Offset = false;
6257 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6258 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6259 "Unexpected register load without SVE load instructions");
6260 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
6262 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6263 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6264 "Unexpected register load without SVE load instructions");
6265 Opc = AArch64::LDR_ZZXI;
6267 }
6268 break;
6269 case 48:
6270 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6271 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6272 Opc = AArch64::LD1Threev2d;
6273 Offset = false;
6274 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6275 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6276 "Unexpected register load without SVE load instructions");
6277 Opc = AArch64::LDR_ZZZXI;
6279 }
6280 break;
6281 case 64:
6282 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6283 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6284 Opc = AArch64::LD1Fourv2d;
6285 Offset = false;
6286 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6287 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6288 "Unexpected register load without SVE load instructions");
6289 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
6291 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6292 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6293 "Unexpected register load without SVE load instructions");
6294 Opc = AArch64::LDR_ZZZZXI;
6296 }
6297 break;
6298 }
6299
6300 assert(Opc && "Unknown register class");
6301 MFI.setStackID(FI, StackID);
6302
6304 .addReg(DestReg, getDefRegState(true))
6305 .addFrameIndex(FI);
6306 if (Offset)
6307 MI.addImm(0);
6308 if (PNRReg.isValid() && !PNRReg.isVirtual())
6309 MI.addDef(PNRReg, RegState::Implicit);
6310 MI.addMemOperand(MMO);
6311}
6312
6314 const MachineInstr &UseMI,
6315 const TargetRegisterInfo *TRI) {
6316 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
6317 UseMI.getIterator()),
6318 [TRI](const MachineInstr &I) {
6319 return I.modifiesRegister(AArch64::NZCV, TRI) ||
6320 I.readsRegister(AArch64::NZCV, TRI);
6321 });
6322}
6323
6324void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6325 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
6326 // The smallest scalable element supported by scaled SVE addressing
6327 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6328 // byte offset must always be a multiple of 2.
6329 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6330
6331 // VGSized offsets are divided by '2', because the VG register is the
6332 // the number of 64bit granules as opposed to 128bit vector chunks,
6333 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
6334 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
6335 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
6336 ByteSized = Offset.getFixed();
6337 VGSized = Offset.getScalable() / 2;
6338}
6339
6340/// Returns the offset in parts to which this frame offset can be
6341/// decomposed for the purpose of describing a frame offset.
6342/// For non-scalable offsets this is simply its byte size.
6343void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6344 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
6345 int64_t &NumDataVectors) {
6346 // The smallest scalable element supported by scaled SVE addressing
6347 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6348 // byte offset must always be a multiple of 2.
6349 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6350
6351 NumBytes = Offset.getFixed();
6352 NumDataVectors = 0;
6353 NumPredicateVectors = Offset.getScalable() / 2;
6354 // This method is used to get the offsets to adjust the frame offset.
6355 // If the function requires ADDPL to be used and needs more than two ADDPL
6356 // instructions, part of the offset is folded into NumDataVectors so that it
6357 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
6358 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
6359 NumPredicateVectors > 62) {
6360 NumDataVectors = NumPredicateVectors / 8;
6361 NumPredicateVectors -= NumDataVectors * 8;
6362 }
6363}
6364
6365// Convenience function to create a DWARF expression for: Constant `Operation`.
6366// This helper emits compact sequences for common cases. For example, for`-15
6367// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
6370 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
6371 // -Constant (1 to 31)
6372 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
6373 Operation = dwarf::DW_OP_minus;
6374 } else if (Constant >= 0 && Constant <= 31) {
6375 // Literal value 0 to 31
6376 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
6377 } else {
6378 // Signed constant
6379 Expr.push_back(dwarf::DW_OP_consts);
6381 }
6382 return Expr.push_back(Operation);
6383}
6384
6385// Convenience function to create a DWARF expression for a register.
6386static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
6387 Expr.push_back((char)dwarf::DW_OP_bregx);
6389 Expr.push_back(0);
6390}
6391
6392// Convenience function to create a DWARF expression for loading a register from
6393// a CFA offset.
6395 int64_t OffsetFromDefCFA) {
6396 // This assumes the top of the DWARF stack contains the CFA.
6397 Expr.push_back(dwarf::DW_OP_dup);
6398 // Add the offset to the register.
6399 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
6400 // Dereference the address (loads a 64 bit value)..
6401 Expr.push_back(dwarf::DW_OP_deref);
6402}
6403
6404// Convenience function to create a comment for
6405// (+/-) NumBytes (* RegScale)?
6406static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6407 StringRef RegScale = {}) {
6408 if (NumBytes) {
6409 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6410 if (!RegScale.empty())
6411 Comment << ' ' << RegScale;
6412 }
6413}
6414
6415// Creates an MCCFIInstruction:
6416// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6418 unsigned Reg,
6419 const StackOffset &Offset) {
6420 int64_t NumBytes, NumVGScaledBytes;
6421 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6422 NumVGScaledBytes);
6423 std::string CommentBuffer;
6424 llvm::raw_string_ostream Comment(CommentBuffer);
6425
6426 if (Reg == AArch64::SP)
6427 Comment << "sp";
6428 else if (Reg == AArch64::FP)
6429 Comment << "fp";
6430 else
6431 Comment << printReg(Reg, &TRI);
6432
6433 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6434 SmallString<64> Expr;
6435 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6436 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6437 // Reg + NumBytes
6438 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6439 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6440 appendOffsetComment(NumBytes, Comment);
6441 if (NumVGScaledBytes) {
6442 // + VG * NumVGScaledBytes
6443 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6444 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6445 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6446 Expr.push_back(dwarf::DW_OP_plus);
6447 }
6448
6449 // Wrap this into DW_CFA_def_cfa.
6450 SmallString<64> DefCfaExpr;
6451 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6452 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6453 DefCfaExpr.append(Expr.str());
6454 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6455 Comment.str());
6456}
6457
6459 unsigned FrameReg, unsigned Reg,
6460 const StackOffset &Offset,
6461 bool LastAdjustmentWasScalable) {
6462 if (Offset.getScalable())
6463 return createDefCFAExpression(TRI, Reg, Offset);
6464
6465 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6466 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6467
6468 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6469 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6470}
6471
6474 const StackOffset &OffsetFromDefCFA,
6475 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6476 int64_t NumBytes, NumVGScaledBytes;
6477 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6478 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6479
6480 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6481
6482 // Non-scalable offsets can use DW_CFA_offset directly.
6483 if (!NumVGScaledBytes)
6484 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6485
6486 std::string CommentBuffer;
6487 llvm::raw_string_ostream Comment(CommentBuffer);
6488 Comment << printReg(Reg, &TRI) << " @ cfa";
6489
6490 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6491 assert(NumVGScaledBytes && "Expected scalable offset");
6492 SmallString<64> OffsetExpr;
6493 // + VG * NumVGScaledBytes
6494 StringRef VGRegScale;
6495 if (IncomingVGOffsetFromDefCFA) {
6496 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6497 VGRegScale = "* IncomingVG";
6498 } else {
6499 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6500 VGRegScale = "* VG";
6501 }
6502 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6503 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6504 OffsetExpr.push_back(dwarf::DW_OP_plus);
6505 if (NumBytes) {
6506 // + NumBytes
6507 appendOffsetComment(NumBytes, Comment);
6508 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6509 }
6510
6511 // Wrap this into DW_CFA_expression
6512 SmallString<64> CfaExpr;
6513 CfaExpr.push_back(dwarf::DW_CFA_expression);
6514 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6515 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6516 CfaExpr.append(OffsetExpr.str());
6517
6518 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6519 Comment.str());
6520}
6521
6522// Helper function to emit a frame offset adjustment from a given
6523// pointer (SrcReg), stored into DestReg. This function is explicit
6524// in that it requires the opcode.
6527 const DebugLoc &DL, unsigned DestReg,
6528 unsigned SrcReg, int64_t Offset, unsigned Opc,
6529 const TargetInstrInfo *TII,
6530 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6531 bool *HasWinCFI, bool EmitCFAOffset,
6532 StackOffset CFAOffset, unsigned FrameReg) {
6533 int Sign = 1;
6534 unsigned MaxEncoding, ShiftSize;
6535 switch (Opc) {
6536 case AArch64::ADDXri:
6537 case AArch64::ADDSXri:
6538 case AArch64::SUBXri:
6539 case AArch64::SUBSXri:
6540 MaxEncoding = 0xfff;
6541 ShiftSize = 12;
6542 break;
6543 case AArch64::ADDVL_XXI:
6544 case AArch64::ADDPL_XXI:
6545 case AArch64::ADDSVL_XXI:
6546 case AArch64::ADDSPL_XXI:
6547 MaxEncoding = 31;
6548 ShiftSize = 0;
6549 if (Offset < 0) {
6550 MaxEncoding = 32;
6551 Sign = -1;
6552 Offset = -Offset;
6553 }
6554 break;
6555 default:
6556 llvm_unreachable("Unsupported opcode");
6557 }
6558
6559 // `Offset` can be in bytes or in "scalable bytes".
6560 int VScale = 1;
6561 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6562 VScale = 16;
6563 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6564 VScale = 2;
6565
6566 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6567 // scratch register. If DestReg is a virtual register, use it as the
6568 // scratch register; otherwise, create a new virtual register (to be
6569 // replaced by the scavenger at the end of PEI). That case can be optimized
6570 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6571 // register can be loaded with offset%8 and the add/sub can use an extending
6572 // instruction with LSL#3.
6573 // Currently the function handles any offsets but generates a poor sequence
6574 // of code.
6575 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6576
6577 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6578 Register TmpReg = DestReg;
6579 if (TmpReg == AArch64::XZR)
6580 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6581 &AArch64::GPR64RegClass);
6582 do {
6583 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6584 unsigned LocalShiftSize = 0;
6585 if (ThisVal > MaxEncoding) {
6586 ThisVal = ThisVal >> ShiftSize;
6587 LocalShiftSize = ShiftSize;
6588 }
6589 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6590 "Encoding cannot handle value that big");
6591
6592 Offset -= ThisVal << LocalShiftSize;
6593 if (Offset == 0)
6594 TmpReg = DestReg;
6595 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6596 .addReg(SrcReg)
6597 .addImm(Sign * (int)ThisVal);
6598 if (ShiftSize)
6599 MBI = MBI.addImm(
6601 MBI = MBI.setMIFlag(Flag);
6602
6603 auto Change =
6604 VScale == 1
6605 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6606 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6607 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6608 CFAOffset += Change;
6609 else
6610 CFAOffset -= Change;
6611 if (EmitCFAOffset && DestReg == TmpReg) {
6612 MachineFunction &MF = *MBB.getParent();
6613 const TargetSubtargetInfo &STI = MF.getSubtarget();
6614 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6615
6616 unsigned CFIIndex = MF.addFrameInst(
6617 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6618 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6619 .addCFIIndex(CFIIndex)
6620 .setMIFlags(Flag);
6621 }
6622
6623 if (NeedsWinCFI) {
6624 int Imm = (int)(ThisVal << LocalShiftSize);
6625 if (VScale != 1 && DestReg == AArch64::SP) {
6626 if (HasWinCFI)
6627 *HasWinCFI = true;
6628 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6629 .addImm(ThisVal)
6630 .setMIFlag(Flag);
6631 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6632 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6633 assert(VScale == 1 && "Expected non-scalable operation");
6634 if (HasWinCFI)
6635 *HasWinCFI = true;
6636 if (Imm == 0)
6637 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6638 else
6639 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6640 .addImm(Imm)
6641 .setMIFlag(Flag);
6642 assert(Offset == 0 && "Expected remaining offset to be zero to "
6643 "emit a single SEH directive");
6644 } else if (DestReg == AArch64::SP) {
6645 assert(VScale == 1 && "Expected non-scalable operation");
6646 if (HasWinCFI)
6647 *HasWinCFI = true;
6648 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6649 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6650 .addImm(Imm)
6651 .setMIFlag(Flag);
6652 }
6653 }
6654
6655 SrcReg = TmpReg;
6656 } while (Offset);
6657}
6658
6661 unsigned DestReg, unsigned SrcReg,
6663 MachineInstr::MIFlag Flag, bool SetNZCV,
6664 bool NeedsWinCFI, bool *HasWinCFI,
6665 bool EmitCFAOffset, StackOffset CFAOffset,
6666 unsigned FrameReg) {
6667 // If a function is marked as arm_locally_streaming, then the runtime value of
6668 // vscale in the prologue/epilogue is different the runtime value of vscale
6669 // in the function's body. To avoid having to consider multiple vscales,
6670 // we can use `addsvl` to allocate any scalable stack-slots, which under
6671 // most circumstances will be only locals, not callee-save slots.
6672 const Function &F = MBB.getParent()->getFunction();
6673 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6674
6675 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6676 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6677 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6678
6679 // Insert ADDSXri for scalable offset at the end.
6680 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6681 if (NeedsFinalDefNZCV)
6682 SetNZCV = false;
6683
6684 // First emit non-scalable frame offsets, or a simple 'mov'.
6685 if (Bytes || (!Offset && SrcReg != DestReg)) {
6686 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6687 "SP increment/decrement not 8-byte aligned");
6688 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6689 if (Bytes < 0) {
6690 Bytes = -Bytes;
6691 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6692 }
6693 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6694 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6695 FrameReg);
6696 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6697 ? StackOffset::getFixed(-Bytes)
6698 : StackOffset::getFixed(Bytes);
6699 SrcReg = DestReg;
6700 FrameReg = DestReg;
6701 }
6702
6703 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6704 "WinCFI can't allocate fractions of an SVE data vector");
6705
6706 if (NumDataVectors) {
6707 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6708 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6709 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6710 FrameReg);
6711 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6712 SrcReg = DestReg;
6713 }
6714
6715 if (NumPredicateVectors) {
6716 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6717 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6718 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6719 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6720 FrameReg);
6721 }
6722
6723 if (NeedsFinalDefNZCV)
6724 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6725 .addReg(DestReg)
6726 .addImm(0)
6727 .addImm(0);
6728}
6729
6732 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6733 LiveIntervals *LIS, VirtRegMap *VRM) const {
6734 // This is a bit of a hack. Consider this instruction:
6735 //
6736 // %0 = COPY %sp; GPR64all:%0
6737 //
6738 // We explicitly chose GPR64all for the virtual register so such a copy might
6739 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6740 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6741 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6742 //
6743 // To prevent that, we are going to constrain the %0 register class here.
6744 if (MI.isFullCopy()) {
6745 Register DstReg = MI.getOperand(0).getReg();
6746 Register SrcReg = MI.getOperand(1).getReg();
6747 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6748 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6749 return nullptr;
6750 }
6751 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6752 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6753 return nullptr;
6754 }
6755 // Nothing can folded with copy from/to NZCV.
6756 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6757 return nullptr;
6758 }
6759
6760 // Handle the case where a copy is being spilled or filled but the source
6761 // and destination register class don't match. For example:
6762 //
6763 // %0 = COPY %xzr; GPR64common:%0
6764 //
6765 // In this case we can still safely fold away the COPY and generate the
6766 // following spill code:
6767 //
6768 // STRXui %xzr, %stack.0
6769 //
6770 // This also eliminates spilled cross register class COPYs (e.g. between x and
6771 // d regs) of the same size. For example:
6772 //
6773 // %0 = COPY %1; GPR64:%0, FPR64:%1
6774 //
6775 // will be filled as
6776 //
6777 // LDRDui %0, fi<#0>
6778 //
6779 // instead of
6780 //
6781 // LDRXui %Temp, fi<#0>
6782 // %0 = FMOV %Temp
6783 //
6784 if (MI.isCopy() && Ops.size() == 1 &&
6785 // Make sure we're only folding the explicit COPY defs/uses.
6786 (Ops[0] == 0 || Ops[0] == 1)) {
6787 bool IsSpill = Ops[0] == 0;
6788 bool IsFill = !IsSpill;
6790 const MachineRegisterInfo &MRI = MF.getRegInfo();
6791 MachineBasicBlock &MBB = *MI.getParent();
6792 const MachineOperand &DstMO = MI.getOperand(0);
6793 const MachineOperand &SrcMO = MI.getOperand(1);
6794 Register DstReg = DstMO.getReg();
6795 Register SrcReg = SrcMO.getReg();
6796 // This is slightly expensive to compute for physical regs since
6797 // getMinimalPhysRegClass is slow.
6798 auto getRegClass = [&](unsigned Reg) {
6799 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6800 : TRI.getMinimalPhysRegClass(Reg);
6801 };
6802
6803 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6804 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6805 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6806 "Mismatched register size in non subreg COPY");
6807 if (IsSpill)
6808 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6809 getRegClass(SrcReg), Register());
6810 else
6811 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6812 getRegClass(DstReg), Register());
6813 return &*--InsertPt;
6814 }
6815
6816 // Handle cases like spilling def of:
6817 //
6818 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6819 //
6820 // where the physical register source can be widened and stored to the full
6821 // virtual reg destination stack slot, in this case producing:
6822 //
6823 // STRXui %xzr, %stack.0
6824 //
6825 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6826 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6827 assert(SrcMO.getSubReg() == 0 &&
6828 "Unexpected subreg on physical register");
6829 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6830 FrameIndex, &AArch64::GPR64RegClass, Register());
6831 return &*--InsertPt;
6832 }
6833
6834 // Handle cases like filling use of:
6835 //
6836 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6837 //
6838 // where we can load the full virtual reg source stack slot, into the subreg
6839 // destination, in this case producing:
6840 //
6841 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6842 //
6843 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6844 const TargetRegisterClass *FillRC = nullptr;
6845 switch (DstMO.getSubReg()) {
6846 default:
6847 break;
6848 case AArch64::sub_32:
6849 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6850 FillRC = &AArch64::GPR32RegClass;
6851 break;
6852 case AArch64::ssub:
6853 FillRC = &AArch64::FPR32RegClass;
6854 break;
6855 case AArch64::dsub:
6856 FillRC = &AArch64::FPR64RegClass;
6857 break;
6858 }
6859
6860 if (FillRC) {
6861 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6862 TRI.getRegSizeInBits(*FillRC) &&
6863 "Mismatched regclass size on folded subreg COPY");
6864 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC,
6865 Register());
6866 MachineInstr &LoadMI = *--InsertPt;
6867 MachineOperand &LoadDst = LoadMI.getOperand(0);
6868 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6869 LoadDst.setSubReg(DstMO.getSubReg());
6870 LoadDst.setIsUndef();
6871 return &LoadMI;
6872 }
6873 }
6874 }
6875
6876 // Cannot fold.
6877 return nullptr;
6878}
6879
6881 StackOffset &SOffset,
6882 bool *OutUseUnscaledOp,
6883 unsigned *OutUnscaledOp,
6884 int64_t *EmittableOffset) {
6885 // Set output values in case of early exit.
6886 if (EmittableOffset)
6887 *EmittableOffset = 0;
6888 if (OutUseUnscaledOp)
6889 *OutUseUnscaledOp = false;
6890 if (OutUnscaledOp)
6891 *OutUnscaledOp = 0;
6892
6893 // Exit early for structured vector spills/fills as they can't take an
6894 // immediate offset.
6895 switch (MI.getOpcode()) {
6896 default:
6897 break;
6898 case AArch64::LD1Rv1d:
6899 case AArch64::LD1Rv2s:
6900 case AArch64::LD1Rv2d:
6901 case AArch64::LD1Rv4h:
6902 case AArch64::LD1Rv4s:
6903 case AArch64::LD1Rv8b:
6904 case AArch64::LD1Rv8h:
6905 case AArch64::LD1Rv16b:
6906 case AArch64::LD1Twov2d:
6907 case AArch64::LD1Threev2d:
6908 case AArch64::LD1Fourv2d:
6909 case AArch64::LD1Twov1d:
6910 case AArch64::LD1Threev1d:
6911 case AArch64::LD1Fourv1d:
6912 case AArch64::ST1Twov2d:
6913 case AArch64::ST1Threev2d:
6914 case AArch64::ST1Fourv2d:
6915 case AArch64::ST1Twov1d:
6916 case AArch64::ST1Threev1d:
6917 case AArch64::ST1Fourv1d:
6918 case AArch64::ST1i8:
6919 case AArch64::ST1i16:
6920 case AArch64::ST1i32:
6921 case AArch64::ST1i64:
6922 case AArch64::IRG:
6923 case AArch64::IRGstack:
6924 case AArch64::STGloop:
6925 case AArch64::STZGloop:
6927 }
6928
6929 // Get the min/max offset and the scale.
6930 TypeSize ScaleValue(0U, false), Width(0U, false);
6931 int64_t MinOff, MaxOff;
6932 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6933 MaxOff))
6934 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6935
6936 // Construct the complete offset.
6937 bool IsMulVL = ScaleValue.isScalable();
6938 unsigned Scale = ScaleValue.getKnownMinValue();
6939 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6940
6941 const MachineOperand &ImmOpnd =
6942 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6943 Offset += ImmOpnd.getImm() * Scale;
6944
6945 // If the offset doesn't match the scale, we rewrite the instruction to
6946 // use the unscaled instruction instead. Likewise, if we have a negative
6947 // offset and there is an unscaled op to use.
6948 std::optional<unsigned> UnscaledOp =
6950 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6951 if (useUnscaledOp &&
6952 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6953 MaxOff))
6954 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6955
6956 Scale = ScaleValue.getKnownMinValue();
6957 assert(IsMulVL == ScaleValue.isScalable() &&
6958 "Unscaled opcode has different value for scalable");
6959
6960 int64_t Remainder = Offset % Scale;
6961 assert(!(Remainder && useUnscaledOp) &&
6962 "Cannot have remainder when using unscaled op");
6963
6964 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6965 int64_t NewOffset = Offset / Scale;
6966 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6967 Offset = Remainder;
6968 else {
6969 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6970 Offset = Offset - (NewOffset * Scale);
6971 }
6972
6973 if (EmittableOffset)
6974 *EmittableOffset = NewOffset;
6975 if (OutUseUnscaledOp)
6976 *OutUseUnscaledOp = useUnscaledOp;
6977 if (OutUnscaledOp && UnscaledOp)
6978 *OutUnscaledOp = *UnscaledOp;
6979
6980 if (IsMulVL)
6981 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6982 else
6983 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6985 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6986}
6987
6989 unsigned FrameReg, StackOffset &Offset,
6990 const AArch64InstrInfo *TII) {
6991 unsigned Opcode = MI.getOpcode();
6992 unsigned ImmIdx = FrameRegIdx + 1;
6993
6994 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6995 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6996 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6997 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6998 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6999 MI.eraseFromParent();
7000 Offset = StackOffset();
7001 return true;
7002 }
7003
7004 int64_t NewOffset;
7005 unsigned UnscaledOp;
7006 bool UseUnscaledOp;
7007 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
7008 &UnscaledOp, &NewOffset);
7011 // Replace the FrameIndex with FrameReg.
7012 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
7013 if (UseUnscaledOp)
7014 MI.setDesc(TII->get(UnscaledOp));
7015
7016 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
7017 return !Offset;
7018 }
7019
7020 return false;
7021}
7022
7028
7029MCInst AArch64InstrInfo::getNop() const { return MCInstBuilder(AArch64::NOP); }
7030
7031// AArch64 supports MachineCombiner.
7032bool AArch64InstrInfo::useMachineCombiner() const { return true; }
7033
7034// True when Opc sets flag
7035static bool isCombineInstrSettingFlag(unsigned Opc) {
7036 switch (Opc) {
7037 case AArch64::ADDSWrr:
7038 case AArch64::ADDSWri:
7039 case AArch64::ADDSXrr:
7040 case AArch64::ADDSXri:
7041 case AArch64::SUBSWrr:
7042 case AArch64::SUBSXrr:
7043 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7044 case AArch64::SUBSWri:
7045 case AArch64::SUBSXri:
7046 return true;
7047 default:
7048 break;
7049 }
7050 return false;
7051}
7052
7053// 32b Opcodes that can be combined with a MUL
7054static bool isCombineInstrCandidate32(unsigned Opc) {
7055 switch (Opc) {
7056 case AArch64::ADDWrr:
7057 case AArch64::ADDWri:
7058 case AArch64::SUBWrr:
7059 case AArch64::ADDSWrr:
7060 case AArch64::ADDSWri:
7061 case AArch64::SUBSWrr:
7062 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7063 case AArch64::SUBWri:
7064 case AArch64::SUBSWri:
7065 return true;
7066 default:
7067 break;
7068 }
7069 return false;
7070}
7071
7072// 64b Opcodes that can be combined with a MUL
7073static bool isCombineInstrCandidate64(unsigned Opc) {
7074 switch (Opc) {
7075 case AArch64::ADDXrr:
7076 case AArch64::ADDXri:
7077 case AArch64::SUBXrr:
7078 case AArch64::ADDSXrr:
7079 case AArch64::ADDSXri:
7080 case AArch64::SUBSXrr:
7081 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7082 case AArch64::SUBXri:
7083 case AArch64::SUBSXri:
7084 case AArch64::ADDv8i8:
7085 case AArch64::ADDv16i8:
7086 case AArch64::ADDv4i16:
7087 case AArch64::ADDv8i16:
7088 case AArch64::ADDv2i32:
7089 case AArch64::ADDv4i32:
7090 case AArch64::SUBv8i8:
7091 case AArch64::SUBv16i8:
7092 case AArch64::SUBv4i16:
7093 case AArch64::SUBv8i16:
7094 case AArch64::SUBv2i32:
7095 case AArch64::SUBv4i32:
7096 return true;
7097 default:
7098 break;
7099 }
7100 return false;
7101}
7102
7103// FP Opcodes that can be combined with a FMUL.
7104static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
7105 switch (Inst.getOpcode()) {
7106 default:
7107 break;
7108 case AArch64::FADDHrr:
7109 case AArch64::FADDSrr:
7110 case AArch64::FADDDrr:
7111 case AArch64::FADDv4f16:
7112 case AArch64::FADDv8f16:
7113 case AArch64::FADDv2f32:
7114 case AArch64::FADDv2f64:
7115 case AArch64::FADDv4f32:
7116 case AArch64::FSUBHrr:
7117 case AArch64::FSUBSrr:
7118 case AArch64::FSUBDrr:
7119 case AArch64::FSUBv4f16:
7120 case AArch64::FSUBv8f16:
7121 case AArch64::FSUBv2f32:
7122 case AArch64::FSUBv2f64:
7123 case AArch64::FSUBv4f32:
7125 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
7126 // the target options or if FADD/FSUB has the contract fast-math flag.
7127 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
7129 }
7130 return false;
7131}
7132
7133// Opcodes that can be combined with a MUL
7137
7138//
7139// Utility routine that checks if \param MO is defined by an
7140// \param CombineOpc instruction in the basic block \param MBB
7142 unsigned CombineOpc, unsigned ZeroReg = 0,
7143 bool CheckZeroReg = false) {
7144 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7145 MachineInstr *MI = nullptr;
7146
7147 if (MO.isReg() && MO.getReg().isVirtual())
7148 MI = MRI.getUniqueVRegDef(MO.getReg());
7149 // And it needs to be in the trace (otherwise, it won't have a depth).
7150 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
7151 return false;
7152 // Must only used by the user we combine with.
7153 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
7154 return false;
7155
7156 if (CheckZeroReg) {
7157 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
7158 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
7159 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
7160 // The third input reg must be zero.
7161 if (MI->getOperand(3).getReg() != ZeroReg)
7162 return false;
7163 }
7164
7165 if (isCombineInstrSettingFlag(CombineOpc) &&
7166 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
7167 return false;
7168
7169 return true;
7170}
7171
7172//
7173// Is \param MO defined by an integer multiply and can be combined?
7175 unsigned MulOpc, unsigned ZeroReg) {
7176 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
7177}
7178
7179//
7180// Is \param MO defined by a floating-point multiply and can be combined?
7182 unsigned MulOpc) {
7183 return canCombine(MBB, MO, MulOpc);
7184}
7185
7186// TODO: There are many more machine instruction opcodes to match:
7187// 1. Other data types (integer, vectors)
7188// 2. Other math / logic operations (xor, or)
7189// 3. Other forms of the same operation (intrinsics and other variants)
7190bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
7191 bool Invert) const {
7192 if (Invert)
7193 return false;
7194 switch (Inst.getOpcode()) {
7195 // == Floating-point types ==
7196 // -- Floating-point instructions --
7197 case AArch64::FADDHrr:
7198 case AArch64::FADDSrr:
7199 case AArch64::FADDDrr:
7200 case AArch64::FMULHrr:
7201 case AArch64::FMULSrr:
7202 case AArch64::FMULDrr:
7203 case AArch64::FMULX16:
7204 case AArch64::FMULX32:
7205 case AArch64::FMULX64:
7206 // -- Advanced SIMD instructions --
7207 case AArch64::FADDv4f16:
7208 case AArch64::FADDv8f16:
7209 case AArch64::FADDv2f32:
7210 case AArch64::FADDv4f32:
7211 case AArch64::FADDv2f64:
7212 case AArch64::FMULv4f16:
7213 case AArch64::FMULv8f16:
7214 case AArch64::FMULv2f32:
7215 case AArch64::FMULv4f32:
7216 case AArch64::FMULv2f64:
7217 case AArch64::FMULXv4f16:
7218 case AArch64::FMULXv8f16:
7219 case AArch64::FMULXv2f32:
7220 case AArch64::FMULXv4f32:
7221 case AArch64::FMULXv2f64:
7222 // -- SVE instructions --
7223 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
7224 // in the SVE instruction set (though there are predicated ones).
7225 case AArch64::FADD_ZZZ_H:
7226 case AArch64::FADD_ZZZ_S:
7227 case AArch64::FADD_ZZZ_D:
7228 case AArch64::FMUL_ZZZ_H:
7229 case AArch64::FMUL_ZZZ_S:
7230 case AArch64::FMUL_ZZZ_D:
7233
7234 // == Integer types ==
7235 // -- Base instructions --
7236 // Opcodes MULWrr and MULXrr don't exist because
7237 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
7238 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
7239 // The machine-combiner does not support three-source-operands machine
7240 // instruction. So we cannot reassociate MULs.
7241 case AArch64::ADDWrr:
7242 case AArch64::ADDXrr:
7243 case AArch64::ANDWrr:
7244 case AArch64::ANDXrr:
7245 case AArch64::ORRWrr:
7246 case AArch64::ORRXrr:
7247 case AArch64::EORWrr:
7248 case AArch64::EORXrr:
7249 case AArch64::EONWrr:
7250 case AArch64::EONXrr:
7251 // -- Advanced SIMD instructions --
7252 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
7253 // in the Advanced SIMD instruction set.
7254 case AArch64::ADDv8i8:
7255 case AArch64::ADDv16i8:
7256 case AArch64::ADDv4i16:
7257 case AArch64::ADDv8i16:
7258 case AArch64::ADDv2i32:
7259 case AArch64::ADDv4i32:
7260 case AArch64::ADDv1i64:
7261 case AArch64::ADDv2i64:
7262 case AArch64::MULv8i8:
7263 case AArch64::MULv16i8:
7264 case AArch64::MULv4i16:
7265 case AArch64::MULv8i16:
7266 case AArch64::MULv2i32:
7267 case AArch64::MULv4i32:
7268 case AArch64::ANDv8i8:
7269 case AArch64::ANDv16i8:
7270 case AArch64::ORRv8i8:
7271 case AArch64::ORRv16i8:
7272 case AArch64::EORv8i8:
7273 case AArch64::EORv16i8:
7274 // -- SVE instructions --
7275 case AArch64::ADD_ZZZ_B:
7276 case AArch64::ADD_ZZZ_H:
7277 case AArch64::ADD_ZZZ_S:
7278 case AArch64::ADD_ZZZ_D:
7279 case AArch64::MUL_ZZZ_B:
7280 case AArch64::MUL_ZZZ_H:
7281 case AArch64::MUL_ZZZ_S:
7282 case AArch64::MUL_ZZZ_D:
7283 case AArch64::AND_ZZZ:
7284 case AArch64::ORR_ZZZ:
7285 case AArch64::EOR_ZZZ:
7286 return true;
7287
7288 default:
7289 return false;
7290 }
7291}
7292
7293/// Find instructions that can be turned into madd.
7295 SmallVectorImpl<unsigned> &Patterns) {
7296 unsigned Opc = Root.getOpcode();
7297 MachineBasicBlock &MBB = *Root.getParent();
7298 bool Found = false;
7299
7301 return false;
7303 int Cmp_NZCV =
7304 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
7305 // When NZCV is live bail out.
7306 if (Cmp_NZCV == -1)
7307 return false;
7308 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
7309 // When opcode can't change bail out.
7310 // CHECKME: do we miss any cases for opcode conversion?
7311 if (NewOpc == Opc)
7312 return false;
7313 Opc = NewOpc;
7314 }
7315
7316 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
7317 unsigned Pattern) {
7318 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
7319 Patterns.push_back(Pattern);
7320 Found = true;
7321 }
7322 };
7323
7324 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
7325 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
7326 Patterns.push_back(Pattern);
7327 Found = true;
7328 }
7329 };
7330
7332
7333 switch (Opc) {
7334 default:
7335 break;
7336 case AArch64::ADDWrr:
7337 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7338 "ADDWrr does not have register operands");
7339 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
7340 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
7341 break;
7342 case AArch64::ADDXrr:
7343 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
7344 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
7345 break;
7346 case AArch64::SUBWrr:
7347 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
7348 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
7349 break;
7350 case AArch64::SUBXrr:
7351 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
7352 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
7353 break;
7354 case AArch64::ADDWri:
7355 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
7356 break;
7357 case AArch64::ADDXri:
7358 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
7359 break;
7360 case AArch64::SUBWri:
7361 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
7362 break;
7363 case AArch64::SUBXri:
7364 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
7365 break;
7366 case AArch64::ADDv8i8:
7367 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
7368 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
7369 break;
7370 case AArch64::ADDv16i8:
7371 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
7372 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
7373 break;
7374 case AArch64::ADDv4i16:
7375 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
7376 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
7377 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
7378 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
7379 break;
7380 case AArch64::ADDv8i16:
7381 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
7382 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
7383 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
7384 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
7385 break;
7386 case AArch64::ADDv2i32:
7387 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
7388 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
7389 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
7390 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
7391 break;
7392 case AArch64::ADDv4i32:
7393 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
7394 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
7395 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
7396 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
7397 break;
7398 case AArch64::SUBv8i8:
7399 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7400 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7401 break;
7402 case AArch64::SUBv16i8:
7403 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7404 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7405 break;
7406 case AArch64::SUBv4i16:
7407 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7408 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7409 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7410 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7411 break;
7412 case AArch64::SUBv8i16:
7413 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7414 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7415 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7416 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7417 break;
7418 case AArch64::SUBv2i32:
7419 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7420 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7421 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7422 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7423 break;
7424 case AArch64::SUBv4i32:
7425 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7426 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7427 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7428 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7429 break;
7430 }
7431 return Found;
7432}
7433
7434bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7435 switch (Opcode) {
7436 default:
7437 break;
7438 case AArch64::UABALB_ZZZ_D:
7439 case AArch64::UABALB_ZZZ_H:
7440 case AArch64::UABALB_ZZZ_S:
7441 case AArch64::UABALT_ZZZ_D:
7442 case AArch64::UABALT_ZZZ_H:
7443 case AArch64::UABALT_ZZZ_S:
7444 case AArch64::SABALB_ZZZ_D:
7445 case AArch64::SABALB_ZZZ_S:
7446 case AArch64::SABALB_ZZZ_H:
7447 case AArch64::SABALT_ZZZ_D:
7448 case AArch64::SABALT_ZZZ_S:
7449 case AArch64::SABALT_ZZZ_H:
7450 case AArch64::UABALv16i8_v8i16:
7451 case AArch64::UABALv2i32_v2i64:
7452 case AArch64::UABALv4i16_v4i32:
7453 case AArch64::UABALv4i32_v2i64:
7454 case AArch64::UABALv8i16_v4i32:
7455 case AArch64::UABALv8i8_v8i16:
7456 case AArch64::UABAv16i8:
7457 case AArch64::UABAv2i32:
7458 case AArch64::UABAv4i16:
7459 case AArch64::UABAv4i32:
7460 case AArch64::UABAv8i16:
7461 case AArch64::UABAv8i8:
7462 case AArch64::SABALv16i8_v8i16:
7463 case AArch64::SABALv2i32_v2i64:
7464 case AArch64::SABALv4i16_v4i32:
7465 case AArch64::SABALv4i32_v2i64:
7466 case AArch64::SABALv8i16_v4i32:
7467 case AArch64::SABALv8i8_v8i16:
7468 case AArch64::SABAv16i8:
7469 case AArch64::SABAv2i32:
7470 case AArch64::SABAv4i16:
7471 case AArch64::SABAv4i32:
7472 case AArch64::SABAv8i16:
7473 case AArch64::SABAv8i8:
7474 return true;
7475 }
7476
7477 return false;
7478}
7479
7480unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7481 unsigned AccumulationOpcode) const {
7482 switch (AccumulationOpcode) {
7483 default:
7484 llvm_unreachable("Unsupported accumulation Opcode!");
7485 case AArch64::UABALB_ZZZ_D:
7486 return AArch64::UABDLB_ZZZ_D;
7487 case AArch64::UABALB_ZZZ_H:
7488 return AArch64::UABDLB_ZZZ_H;
7489 case AArch64::UABALB_ZZZ_S:
7490 return AArch64::UABDLB_ZZZ_S;
7491 case AArch64::UABALT_ZZZ_D:
7492 return AArch64::UABDLT_ZZZ_D;
7493 case AArch64::UABALT_ZZZ_H:
7494 return AArch64::UABDLT_ZZZ_H;
7495 case AArch64::UABALT_ZZZ_S:
7496 return AArch64::UABDLT_ZZZ_S;
7497 case AArch64::UABALv16i8_v8i16:
7498 return AArch64::UABDLv16i8_v8i16;
7499 case AArch64::UABALv2i32_v2i64:
7500 return AArch64::UABDLv2i32_v2i64;
7501 case AArch64::UABALv4i16_v4i32:
7502 return AArch64::UABDLv4i16_v4i32;
7503 case AArch64::UABALv4i32_v2i64:
7504 return AArch64::UABDLv4i32_v2i64;
7505 case AArch64::UABALv8i16_v4i32:
7506 return AArch64::UABDLv8i16_v4i32;
7507 case AArch64::UABALv8i8_v8i16:
7508 return AArch64::UABDLv8i8_v8i16;
7509 case AArch64::UABAv16i8:
7510 return AArch64::UABDv16i8;
7511 case AArch64::UABAv2i32:
7512 return AArch64::UABDv2i32;
7513 case AArch64::UABAv4i16:
7514 return AArch64::UABDv4i16;
7515 case AArch64::UABAv4i32:
7516 return AArch64::UABDv4i32;
7517 case AArch64::UABAv8i16:
7518 return AArch64::UABDv8i16;
7519 case AArch64::UABAv8i8:
7520 return AArch64::UABDv8i8;
7521 case AArch64::SABALB_ZZZ_D:
7522 return AArch64::SABDLB_ZZZ_D;
7523 case AArch64::SABALB_ZZZ_S:
7524 return AArch64::SABDLB_ZZZ_S;
7525 case AArch64::SABALB_ZZZ_H:
7526 return AArch64::SABDLB_ZZZ_H;
7527 case AArch64::SABALT_ZZZ_D:
7528 return AArch64::SABDLT_ZZZ_D;
7529 case AArch64::SABALT_ZZZ_S:
7530 return AArch64::SABDLT_ZZZ_S;
7531 case AArch64::SABALT_ZZZ_H:
7532 return AArch64::SABDLT_ZZZ_H;
7533 case AArch64::SABALv16i8_v8i16:
7534 return AArch64::SABDLv16i8_v8i16;
7535 case AArch64::SABALv2i32_v2i64:
7536 return AArch64::SABDLv2i32_v2i64;
7537 case AArch64::SABALv4i16_v4i32:
7538 return AArch64::SABDLv4i16_v4i32;
7539 case AArch64::SABALv4i32_v2i64:
7540 return AArch64::SABDLv4i32_v2i64;
7541 case AArch64::SABALv8i16_v4i32:
7542 return AArch64::SABDLv8i16_v4i32;
7543 case AArch64::SABALv8i8_v8i16:
7544 return AArch64::SABDLv8i8_v8i16;
7545 case AArch64::SABAv16i8:
7546 return AArch64::SABDv16i8;
7547 case AArch64::SABAv2i32:
7548 return AArch64::SABAv2i32;
7549 case AArch64::SABAv4i16:
7550 return AArch64::SABDv4i16;
7551 case AArch64::SABAv4i32:
7552 return AArch64::SABDv4i32;
7553 case AArch64::SABAv8i16:
7554 return AArch64::SABDv8i16;
7555 case AArch64::SABAv8i8:
7556 return AArch64::SABDv8i8;
7557 }
7558}
7559
7560/// Floating-Point Support
7561
7562/// Find instructions that can be turned into madd.
7564 SmallVectorImpl<unsigned> &Patterns) {
7565
7566 if (!isCombineInstrCandidateFP(Root))
7567 return false;
7568
7569 MachineBasicBlock &MBB = *Root.getParent();
7570 bool Found = false;
7571
7572 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7573 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7574 Patterns.push_back(Pattern);
7575 return true;
7576 }
7577 return false;
7578 };
7579
7581
7582 switch (Root.getOpcode()) {
7583 default:
7584 assert(false && "Unsupported FP instruction in combiner\n");
7585 break;
7586 case AArch64::FADDHrr:
7587 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7588 "FADDHrr does not have register operands");
7589
7590 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7591 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7592 break;
7593 case AArch64::FADDSrr:
7594 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7595 "FADDSrr does not have register operands");
7596
7597 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7598 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7599
7600 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7601 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7602 break;
7603 case AArch64::FADDDrr:
7604 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7605 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7606
7607 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7608 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7609 break;
7610 case AArch64::FADDv4f16:
7611 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7612 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7613
7614 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7615 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7616 break;
7617 case AArch64::FADDv8f16:
7618 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7619 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7620
7621 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7622 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7623 break;
7624 case AArch64::FADDv2f32:
7625 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7626 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7627
7628 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7629 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7630 break;
7631 case AArch64::FADDv2f64:
7632 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7633 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7634
7635 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7636 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7637 break;
7638 case AArch64::FADDv4f32:
7639 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7640 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7641
7642 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7643 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7644 break;
7645 case AArch64::FSUBHrr:
7646 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7647 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7648 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7649 break;
7650 case AArch64::FSUBSrr:
7651 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7652
7653 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7654 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7655
7656 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7657 break;
7658 case AArch64::FSUBDrr:
7659 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7660
7661 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7662 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7663
7664 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7665 break;
7666 case AArch64::FSUBv4f16:
7667 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7668 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7669
7670 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7671 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7672 break;
7673 case AArch64::FSUBv8f16:
7674 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7675 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7676
7677 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7678 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7679 break;
7680 case AArch64::FSUBv2f32:
7681 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7682 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7683
7684 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7685 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7686 break;
7687 case AArch64::FSUBv2f64:
7688 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7689 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7690
7691 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7692 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7693 break;
7694 case AArch64::FSUBv4f32:
7695 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7696 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7697
7698 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7699 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7700 break;
7701 }
7702 return Found;
7703}
7704
7706 SmallVectorImpl<unsigned> &Patterns) {
7707 MachineBasicBlock &MBB = *Root.getParent();
7708 bool Found = false;
7709
7710 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7711 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7712 MachineOperand &MO = Root.getOperand(Operand);
7713 MachineInstr *MI = nullptr;
7714 if (MO.isReg() && MO.getReg().isVirtual())
7715 MI = MRI.getUniqueVRegDef(MO.getReg());
7716 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7717 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7718 MI->getOperand(1).getReg().isVirtual())
7719 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7720 if (MI && MI->getOpcode() == Opcode) {
7721 Patterns.push_back(Pattern);
7722 return true;
7723 }
7724 return false;
7725 };
7726
7728
7729 switch (Root.getOpcode()) {
7730 default:
7731 return false;
7732 case AArch64::FMULv2f32:
7733 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7734 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7735 break;
7736 case AArch64::FMULv2f64:
7737 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7738 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7739 break;
7740 case AArch64::FMULv4f16:
7741 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7742 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7743 break;
7744 case AArch64::FMULv4f32:
7745 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7746 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7747 break;
7748 case AArch64::FMULv8f16:
7749 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7750 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7751 break;
7752 }
7753
7754 return Found;
7755}
7756
7758 SmallVectorImpl<unsigned> &Patterns) {
7759 unsigned Opc = Root.getOpcode();
7760 MachineBasicBlock &MBB = *Root.getParent();
7761 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7762
7763 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7764 MachineOperand &MO = Root.getOperand(1);
7765 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7766 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7767 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7771 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7772 Patterns.push_back(Pattern);
7773 return true;
7774 }
7775 return false;
7776 };
7777
7778 switch (Opc) {
7779 default:
7780 break;
7781 case AArch64::FNEGDr:
7782 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7783 case AArch64::FNEGSr:
7784 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7785 }
7786
7787 return false;
7788}
7789
7790/// Return true when a code sequence can improve throughput. It
7791/// should be called only for instructions in loops.
7792/// \param Pattern - combiner pattern
7794 switch (Pattern) {
7795 default:
7796 break;
7902 return true;
7903 } // end switch (Pattern)
7904 return false;
7905}
7906
7907/// Find other MI combine patterns.
7909 SmallVectorImpl<unsigned> &Patterns) {
7910 // A - (B + C) ==> (A - B) - C or (A - C) - B
7911 unsigned Opc = Root.getOpcode();
7912 MachineBasicBlock &MBB = *Root.getParent();
7913
7914 switch (Opc) {
7915 case AArch64::SUBWrr:
7916 case AArch64::SUBSWrr:
7917 case AArch64::SUBXrr:
7918 case AArch64::SUBSXrr:
7919 // Found candidate root.
7920 break;
7921 default:
7922 return false;
7923 }
7924
7926 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7927 -1)
7928 return false;
7929
7930 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7931 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7932 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7933 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7936 return true;
7937 }
7938
7939 return false;
7940}
7941
7942/// Check if the given instruction forms a gather load pattern that can be
7943/// optimized for better Memory-Level Parallelism (MLP). This function
7944/// identifies chains of NEON lane load instructions that load data from
7945/// different memory addresses into individual lanes of a 128-bit vector
7946/// register, then attempts to split the pattern into parallel loads to break
7947/// the serial dependency between instructions.
7948///
7949/// Pattern Matched:
7950/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7951/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7952///
7953/// Transformed Into:
7954/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7955/// to combine the results, enabling better memory-level parallelism.
7956///
7957/// Supported Element Types:
7958/// - 32-bit elements (LD1i32, 4 lanes total)
7959/// - 16-bit elements (LD1i16, 8 lanes total)
7960/// - 8-bit elements (LD1i8, 16 lanes total)
7962 SmallVectorImpl<unsigned> &Patterns,
7963 unsigned LoadLaneOpCode, unsigned NumLanes) {
7964 const MachineFunction *MF = Root.getMF();
7965
7966 // Early exit if optimizing for size.
7967 if (MF->getFunction().hasMinSize())
7968 return false;
7969
7970 const MachineRegisterInfo &MRI = MF->getRegInfo();
7972
7973 // The root of the pattern must load into the last lane of the vector.
7974 if (Root.getOperand(2).getImm() != NumLanes - 1)
7975 return false;
7976
7977 // Check that we have load into all lanes except lane 0.
7978 // For each load we also want to check that:
7979 // 1. It has a single non-debug use (since we will be replacing the virtual
7980 // register)
7981 // 2. That the addressing mode only uses a single pointer operand
7982 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7983 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7984 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7986 while (!RemainingLanes.empty() && CurrInstr &&
7987 CurrInstr->getOpcode() == LoadLaneOpCode &&
7988 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7989 CurrInstr->getNumOperands() == 4) {
7990 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7991 LoadInstrs.push_back(CurrInstr);
7992 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7993 }
7994
7995 // Check that we have found a match for lanes N-1.. 1.
7996 if (!RemainingLanes.empty())
7997 return false;
7998
7999 // Match the SUBREG_TO_REG sequence.
8000 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
8001 return false;
8002
8003 // Verify that the subreg to reg loads an integer into the first lane.
8004 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
8005 unsigned SingleLaneSizeInBits = 128 / NumLanes;
8006 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
8007 return false;
8008
8009 // Verify that it also has a single non debug use.
8010 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
8011 return false;
8012
8013 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
8014
8015 // If there is any chance of aliasing, do not apply the pattern.
8016 // Walk backward through the MBB starting from Root.
8017 // Exit early if we've encountered all load instructions or hit the search
8018 // limit.
8019 auto MBBItr = Root.getIterator();
8020 unsigned RemainingSteps = GatherOptSearchLimit;
8021 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
8022 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
8023 const MachineBasicBlock *MBB = Root.getParent();
8024
8025 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
8026 !RemainingLoadInstrs.empty();
8027 --MBBItr, --RemainingSteps) {
8028 const MachineInstr &CurrInstr = *MBBItr;
8029
8030 // Remove this instruction from remaining loads if it's one we're tracking.
8031 RemainingLoadInstrs.erase(&CurrInstr);
8032
8033 // Check for potential aliasing with any of the load instructions to
8034 // optimize.
8035 if (CurrInstr.isLoadFoldBarrier())
8036 return false;
8037 }
8038
8039 // If we hit the search limit without finding all load instructions,
8040 // don't match the pattern.
8041 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
8042 return false;
8043
8044 switch (NumLanes) {
8045 case 4:
8047 break;
8048 case 8:
8050 break;
8051 case 16:
8053 break;
8054 default:
8055 llvm_unreachable("Got bad number of lanes for gather pattern.");
8056 }
8057
8058 return true;
8059}
8060
8061/// Search for patterns of LD instructions we can optimize.
8063 SmallVectorImpl<unsigned> &Patterns) {
8064
8065 // The pattern searches for loads into single lanes.
8066 switch (Root.getOpcode()) {
8067 case AArch64::LD1i32:
8068 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
8069 case AArch64::LD1i16:
8070 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
8071 case AArch64::LD1i8:
8072 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
8073 default:
8074 return false;
8075 }
8076}
8077
8078/// Generate optimized instruction sequence for gather load patterns to improve
8079/// Memory-Level Parallelism (MLP). This function transforms a chain of
8080/// sequential NEON lane loads into parallel vector loads that can execute
8081/// concurrently.
8082static void
8086 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8087 unsigned Pattern, unsigned NumLanes) {
8088 MachineFunction &MF = *Root.getParent()->getParent();
8091
8092 // Gather the initial load instructions to build the pattern.
8093 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
8094 MachineInstr *CurrInstr = &Root;
8095 for (unsigned i = 0; i < NumLanes - 1; ++i) {
8096 LoadToLaneInstrs.push_back(CurrInstr);
8097 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
8098 }
8099
8100 // Sort the load instructions according to the lane.
8101 llvm::sort(LoadToLaneInstrs,
8102 [](const MachineInstr *A, const MachineInstr *B) {
8103 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
8104 });
8105
8106 MachineInstr *SubregToReg = CurrInstr;
8107 LoadToLaneInstrs.push_back(
8108 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
8109 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
8110
8111 const TargetRegisterClass *FPR128RegClass =
8112 MRI.getRegClass(Root.getOperand(0).getReg());
8113
8114 // Helper lambda to create a LD1 instruction.
8115 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
8116 Register SrcRegister, unsigned Lane,
8117 Register OffsetRegister,
8118 bool OffsetRegisterKillState) {
8119 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
8120 MachineInstrBuilder LoadIndexIntoRegister =
8121 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
8122 NewRegister)
8123 .addReg(SrcRegister)
8124 .addImm(Lane)
8125 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
8126 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
8127 InsInstrs.push_back(LoadIndexIntoRegister);
8128 return NewRegister;
8129 };
8130
8131 // Helper to create load instruction based on the NumLanes in the NEON
8132 // register we are rewriting.
8133 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
8134 Register OffsetReg,
8135 bool KillState) -> MachineInstrBuilder {
8136 unsigned Opcode;
8137 switch (NumLanes) {
8138 case 4:
8139 Opcode = AArch64::LDRSui;
8140 break;
8141 case 8:
8142 Opcode = AArch64::LDRHui;
8143 break;
8144 case 16:
8145 Opcode = AArch64::LDRBui;
8146 break;
8147 default:
8149 "Got unsupported number of lanes in machine-combiner gather pattern");
8150 }
8151 // Immediate offset load
8152 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
8153 .addReg(OffsetReg)
8154 .addImm(0);
8155 };
8156
8157 // Load the remaining lanes into register 0.
8158 auto LanesToLoadToReg0 =
8159 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
8160 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
8161 Register PrevReg = SubregToReg->getOperand(0).getReg();
8162 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
8163 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8164 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8165 OffsetRegOperand.getReg(),
8166 OffsetRegOperand.isKill());
8167 DelInstrs.push_back(LoadInstr);
8168 }
8169 Register LastLoadReg0 = PrevReg;
8170
8171 // First load into register 1. Perform an integer load to zero out the upper
8172 // lanes in a single instruction.
8173 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
8174 MachineInstr *OriginalSplitLoad =
8175 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
8176 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
8177 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
8178
8179 const MachineOperand &OriginalSplitToLoadOffsetOperand =
8180 OriginalSplitLoad->getOperand(3);
8181 MachineInstrBuilder MiddleIndexLoadInstr =
8182 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
8183 OriginalSplitToLoadOffsetOperand.getReg(),
8184 OriginalSplitToLoadOffsetOperand.isKill());
8185
8186 InstrIdxForVirtReg.insert(
8187 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
8188 InsInstrs.push_back(MiddleIndexLoadInstr);
8189 DelInstrs.push_back(OriginalSplitLoad);
8190
8191 // Subreg To Reg instruction for register 1.
8192 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
8193 unsigned SubregType;
8194 switch (NumLanes) {
8195 case 4:
8196 SubregType = AArch64::ssub;
8197 break;
8198 case 8:
8199 SubregType = AArch64::hsub;
8200 break;
8201 case 16:
8202 SubregType = AArch64::bsub;
8203 break;
8204 default:
8206 "Got invalid NumLanes for machine-combiner gather pattern");
8207 }
8208
8209 auto SubRegToRegInstr =
8210 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
8211 DestRegForSubregToReg)
8212 .addImm(0)
8213 .addReg(DestRegForMiddleIndex, getKillRegState(true))
8214 .addImm(SubregType);
8215 InstrIdxForVirtReg.insert(
8216 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
8217 InsInstrs.push_back(SubRegToRegInstr);
8218
8219 // Load remaining lanes into register 1.
8220 auto LanesToLoadToReg1 =
8221 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
8222 LoadToLaneInstrsAscending.end());
8223 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
8224 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
8225 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8226 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8227 OffsetRegOperand.getReg(),
8228 OffsetRegOperand.isKill());
8229
8230 // Do not add the last reg to DelInstrs - it will be removed later.
8231 if (Index == NumLanes / 2 - 2) {
8232 break;
8233 }
8234 DelInstrs.push_back(LoadInstr);
8235 }
8236 Register LastLoadReg1 = PrevReg;
8237
8238 // Create the final zip instruction to combine the results.
8239 MachineInstrBuilder ZipInstr =
8240 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
8241 Root.getOperand(0).getReg())
8242 .addReg(LastLoadReg0)
8243 .addReg(LastLoadReg1);
8244 InsInstrs.push_back(ZipInstr);
8245}
8246
8260
8261/// Return true when there is potentially a faster code sequence for an
8262/// instruction chain ending in \p Root. All potential patterns are listed in
8263/// the \p Pattern vector. Pattern should be sorted in priority order since the
8264/// pattern evaluator stops checking as soon as it finds a faster sequence.
8265
8266bool AArch64InstrInfo::getMachineCombinerPatterns(
8267 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
8268 bool DoRegPressureReduce) const {
8269 // Integer patterns
8270 if (getMaddPatterns(Root, Patterns))
8271 return true;
8272 // Floating point patterns
8273 if (getFMULPatterns(Root, Patterns))
8274 return true;
8275 if (getFMAPatterns(Root, Patterns))
8276 return true;
8277 if (getFNEGPatterns(Root, Patterns))
8278 return true;
8279
8280 // Other patterns
8281 if (getMiscPatterns(Root, Patterns))
8282 return true;
8283
8284 // Load patterns
8285 if (getLoadPatterns(Root, Patterns))
8286 return true;
8287
8288 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
8289 DoRegPressureReduce);
8290}
8291
8293/// genFusedMultiply - Generate fused multiply instructions.
8294/// This function supports both integer and floating point instructions.
8295/// A typical example:
8296/// F|MUL I=A,B,0
8297/// F|ADD R,I,C
8298/// ==> F|MADD R,A,B,C
8299/// \param MF Containing MachineFunction
8300/// \param MRI Register information
8301/// \param TII Target information
8302/// \param Root is the F|ADD instruction
8303/// \param [out] InsInstrs is a vector of machine instructions and will
8304/// contain the generated madd instruction
8305/// \param IdxMulOpd is index of operand in Root that is the result of
8306/// the F|MUL. In the example above IdxMulOpd is 1.
8307/// \param MaddOpc the opcode fo the f|madd instruction
8308/// \param RC Register class of operands
8309/// \param kind of fma instruction (addressing mode) to be generated
8310/// \param ReplacedAddend is the result register from the instruction
8311/// replacing the non-combined operand, if any.
8312static MachineInstr *
8314 const TargetInstrInfo *TII, MachineInstr &Root,
8315 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
8316 unsigned MaddOpc, const TargetRegisterClass *RC,
8318 const Register *ReplacedAddend = nullptr) {
8319 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8320
8321 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
8322 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8323 Register ResultReg = Root.getOperand(0).getReg();
8324 Register SrcReg0 = MUL->getOperand(1).getReg();
8325 bool Src0IsKill = MUL->getOperand(1).isKill();
8326 Register SrcReg1 = MUL->getOperand(2).getReg();
8327 bool Src1IsKill = MUL->getOperand(2).isKill();
8328
8329 Register SrcReg2;
8330 bool Src2IsKill;
8331 if (ReplacedAddend) {
8332 // If we just generated a new addend, we must be it's only use.
8333 SrcReg2 = *ReplacedAddend;
8334 Src2IsKill = true;
8335 } else {
8336 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
8337 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
8338 }
8339
8340 if (ResultReg.isVirtual())
8341 MRI.constrainRegClass(ResultReg, RC);
8342 if (SrcReg0.isVirtual())
8343 MRI.constrainRegClass(SrcReg0, RC);
8344 if (SrcReg1.isVirtual())
8345 MRI.constrainRegClass(SrcReg1, RC);
8346 if (SrcReg2.isVirtual())
8347 MRI.constrainRegClass(SrcReg2, RC);
8348
8350 if (kind == FMAInstKind::Default)
8351 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8352 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8353 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8354 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8355 else if (kind == FMAInstKind::Indexed)
8356 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8357 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8358 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8359 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8360 .addImm(MUL->getOperand(3).getImm());
8361 else if (kind == FMAInstKind::Accumulator)
8362 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8363 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8364 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8365 .addReg(SrcReg1, getKillRegState(Src1IsKill));
8366 else
8367 assert(false && "Invalid FMA instruction kind \n");
8368 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
8369 InsInstrs.push_back(MIB);
8370 return MUL;
8371}
8372
8373static MachineInstr *
8375 const TargetInstrInfo *TII, MachineInstr &Root,
8377 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8378
8379 unsigned Opc = 0;
8380 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
8381 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
8382 Opc = AArch64::FNMADDSrrr;
8383 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
8384 Opc = AArch64::FNMADDDrrr;
8385 else
8386 return nullptr;
8387
8388 Register ResultReg = Root.getOperand(0).getReg();
8389 Register SrcReg0 = MAD->getOperand(1).getReg();
8390 Register SrcReg1 = MAD->getOperand(2).getReg();
8391 Register SrcReg2 = MAD->getOperand(3).getReg();
8392 bool Src0IsKill = MAD->getOperand(1).isKill();
8393 bool Src1IsKill = MAD->getOperand(2).isKill();
8394 bool Src2IsKill = MAD->getOperand(3).isKill();
8395 if (ResultReg.isVirtual())
8396 MRI.constrainRegClass(ResultReg, RC);
8397 if (SrcReg0.isVirtual())
8398 MRI.constrainRegClass(SrcReg0, RC);
8399 if (SrcReg1.isVirtual())
8400 MRI.constrainRegClass(SrcReg1, RC);
8401 if (SrcReg2.isVirtual())
8402 MRI.constrainRegClass(SrcReg2, RC);
8403
8405 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8406 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8407 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8408 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8409 InsInstrs.push_back(MIB);
8410
8411 return MAD;
8412}
8413
8414/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8415static MachineInstr *
8418 unsigned IdxDupOp, unsigned MulOpc,
8420 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8421 "Invalid index of FMUL operand");
8422
8423 MachineFunction &MF = *Root.getMF();
8425
8426 MachineInstr *Dup =
8427 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8428
8429 if (Dup->getOpcode() == TargetOpcode::COPY)
8430 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8431
8432 Register DupSrcReg = Dup->getOperand(1).getReg();
8433 MRI.clearKillFlags(DupSrcReg);
8434 MRI.constrainRegClass(DupSrcReg, RC);
8435
8436 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8437
8438 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8439 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8440
8441 Register ResultReg = Root.getOperand(0).getReg();
8442
8444 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8445 .add(MulOp)
8446 .addReg(DupSrcReg)
8447 .addImm(DupSrcLane);
8448
8449 InsInstrs.push_back(MIB);
8450 return &Root;
8451}
8452
8453/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8454/// instructions.
8455///
8456/// \see genFusedMultiply
8460 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8461 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8463}
8464
8465/// genNeg - Helper to generate an intermediate negation of the second operand
8466/// of Root
8468 const TargetInstrInfo *TII, MachineInstr &Root,
8470 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8471 unsigned MnegOpc, const TargetRegisterClass *RC) {
8472 Register NewVR = MRI.createVirtualRegister(RC);
8474 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8475 .add(Root.getOperand(2));
8476 InsInstrs.push_back(MIB);
8477
8478 assert(InstrIdxForVirtReg.empty());
8479 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8480
8481 return NewVR;
8482}
8483
8484/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8485/// instructions with an additional negation of the accumulator
8489 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8490 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8491 assert(IdxMulOpd == 1);
8492
8493 Register NewVR =
8494 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8495 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8496 FMAInstKind::Accumulator, &NewVR);
8497}
8498
8499/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8500/// instructions.
8501///
8502/// \see genFusedMultiply
8506 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8507 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8509}
8510
8511/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8512/// instructions with an additional negation of the accumulator
8516 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8517 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8518 assert(IdxMulOpd == 1);
8519
8520 Register NewVR =
8521 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8522
8523 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8524 FMAInstKind::Indexed, &NewVR);
8525}
8526
8527/// genMaddR - Generate madd instruction and combine mul and add using
8528/// an extra virtual register
8529/// Example - an ADD intermediate needs to be stored in a register:
8530/// MUL I=A,B,0
8531/// ADD R,I,Imm
8532/// ==> ORR V, ZR, Imm
8533/// ==> MADD R,A,B,V
8534/// \param MF Containing MachineFunction
8535/// \param MRI Register information
8536/// \param TII Target information
8537/// \param Root is the ADD instruction
8538/// \param [out] InsInstrs is a vector of machine instructions and will
8539/// contain the generated madd instruction
8540/// \param IdxMulOpd is index of operand in Root that is the result of
8541/// the MUL. In the example above IdxMulOpd is 1.
8542/// \param MaddOpc the opcode fo the madd instruction
8543/// \param VR is a virtual register that holds the value of an ADD operand
8544/// (V in the example above).
8545/// \param RC Register class of operands
8547 const TargetInstrInfo *TII, MachineInstr &Root,
8549 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8550 const TargetRegisterClass *RC) {
8551 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8552
8553 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8554 Register ResultReg = Root.getOperand(0).getReg();
8555 Register SrcReg0 = MUL->getOperand(1).getReg();
8556 bool Src0IsKill = MUL->getOperand(1).isKill();
8557 Register SrcReg1 = MUL->getOperand(2).getReg();
8558 bool Src1IsKill = MUL->getOperand(2).isKill();
8559
8560 if (ResultReg.isVirtual())
8561 MRI.constrainRegClass(ResultReg, RC);
8562 if (SrcReg0.isVirtual())
8563 MRI.constrainRegClass(SrcReg0, RC);
8564 if (SrcReg1.isVirtual())
8565 MRI.constrainRegClass(SrcReg1, RC);
8567 MRI.constrainRegClass(VR, RC);
8568
8570 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8571 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8572 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8573 .addReg(VR);
8574 // Insert the MADD
8575 InsInstrs.push_back(MIB);
8576 return MUL;
8577}
8578
8579/// Do the following transformation
8580/// A - (B + C) ==> (A - B) - C
8581/// A - (B + C) ==> (A - C) - B
8583 const TargetInstrInfo *TII, MachineInstr &Root,
8586 unsigned IdxOpd1,
8587 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8588 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8589 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8590 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8591
8592 Register ResultReg = Root.getOperand(0).getReg();
8593 Register RegA = Root.getOperand(1).getReg();
8594 bool RegAIsKill = Root.getOperand(1).isKill();
8595 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8596 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8597 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8598 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8599 Register NewVR =
8600 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8601
8602 unsigned Opcode = Root.getOpcode();
8603 if (Opcode == AArch64::SUBSWrr)
8604 Opcode = AArch64::SUBWrr;
8605 else if (Opcode == AArch64::SUBSXrr)
8606 Opcode = AArch64::SUBXrr;
8607 else
8608 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8609 "Unexpected instruction opcode.");
8610
8611 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8612 Flags &= ~MachineInstr::NoSWrap;
8613 Flags &= ~MachineInstr::NoUWrap;
8614
8615 MachineInstrBuilder MIB1 =
8616 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8617 .addReg(RegA, getKillRegState(RegAIsKill))
8618 .addReg(RegB, getKillRegState(RegBIsKill))
8619 .setMIFlags(Flags);
8620 MachineInstrBuilder MIB2 =
8621 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8622 .addReg(NewVR, getKillRegState(true))
8623 .addReg(RegC, getKillRegState(RegCIsKill))
8624 .setMIFlags(Flags);
8625
8626 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8627 InsInstrs.push_back(MIB1);
8628 InsInstrs.push_back(MIB2);
8629 DelInstrs.push_back(AddMI);
8630 DelInstrs.push_back(&Root);
8631}
8632
8633unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8634 unsigned int AccumulatorOpCode) const {
8635 switch (AccumulatorOpCode) {
8636 case AArch64::UABALB_ZZZ_D:
8637 case AArch64::SABALB_ZZZ_D:
8638 case AArch64::UABALT_ZZZ_D:
8639 case AArch64::SABALT_ZZZ_D:
8640 return AArch64::ADD_ZZZ_D;
8641 case AArch64::UABALB_ZZZ_H:
8642 case AArch64::SABALB_ZZZ_H:
8643 case AArch64::UABALT_ZZZ_H:
8644 case AArch64::SABALT_ZZZ_H:
8645 return AArch64::ADD_ZZZ_H;
8646 case AArch64::UABALB_ZZZ_S:
8647 case AArch64::SABALB_ZZZ_S:
8648 case AArch64::UABALT_ZZZ_S:
8649 case AArch64::SABALT_ZZZ_S:
8650 return AArch64::ADD_ZZZ_S;
8651 case AArch64::UABALv16i8_v8i16:
8652 case AArch64::SABALv8i8_v8i16:
8653 case AArch64::SABAv8i16:
8654 case AArch64::UABAv8i16:
8655 return AArch64::ADDv8i16;
8656 case AArch64::SABALv2i32_v2i64:
8657 case AArch64::UABALv2i32_v2i64:
8658 case AArch64::SABALv4i32_v2i64:
8659 return AArch64::ADDv2i64;
8660 case AArch64::UABALv4i16_v4i32:
8661 case AArch64::SABALv4i16_v4i32:
8662 case AArch64::SABALv8i16_v4i32:
8663 case AArch64::SABAv4i32:
8664 case AArch64::UABAv4i32:
8665 return AArch64::ADDv4i32;
8666 case AArch64::UABALv4i32_v2i64:
8667 return AArch64::ADDv2i64;
8668 case AArch64::UABALv8i16_v4i32:
8669 return AArch64::ADDv4i32;
8670 case AArch64::UABALv8i8_v8i16:
8671 case AArch64::SABALv16i8_v8i16:
8672 return AArch64::ADDv8i16;
8673 case AArch64::UABAv16i8:
8674 case AArch64::SABAv16i8:
8675 return AArch64::ADDv16i8;
8676 case AArch64::UABAv4i16:
8677 case AArch64::SABAv4i16:
8678 return AArch64::ADDv4i16;
8679 case AArch64::UABAv2i32:
8680 case AArch64::SABAv2i32:
8681 return AArch64::ADDv2i32;
8682 case AArch64::UABAv8i8:
8683 case AArch64::SABAv8i8:
8684 return AArch64::ADDv8i8;
8685 default:
8686 llvm_unreachable("Unknown accumulator opcode");
8687 }
8688}
8689
8690/// When getMachineCombinerPatterns() finds potential patterns,
8691/// this function generates the instructions that could replace the
8692/// original code sequence
8693void AArch64InstrInfo::genAlternativeCodeSequence(
8694 MachineInstr &Root, unsigned Pattern,
8697 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8698 MachineBasicBlock &MBB = *Root.getParent();
8699 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8700 MachineFunction &MF = *MBB.getParent();
8701 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8702
8703 MachineInstr *MUL = nullptr;
8704 const TargetRegisterClass *RC;
8705 unsigned Opc;
8706 switch (Pattern) {
8707 default:
8708 // Reassociate instructions.
8709 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8710 DelInstrs, InstrIdxForVirtReg);
8711 return;
8713 // A - (B + C)
8714 // ==> (A - B) - C
8715 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8716 InstrIdxForVirtReg);
8717 return;
8719 // A - (B + C)
8720 // ==> (A - C) - B
8721 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8722 InstrIdxForVirtReg);
8723 return;
8726 // MUL I=A,B,0
8727 // ADD R,I,C
8728 // ==> MADD R,A,B,C
8729 // --- Create(MADD);
8731 Opc = AArch64::MADDWrrr;
8732 RC = &AArch64::GPR32RegClass;
8733 } else {
8734 Opc = AArch64::MADDXrrr;
8735 RC = &AArch64::GPR64RegClass;
8736 }
8737 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8738 break;
8741 // MUL I=A,B,0
8742 // ADD R,C,I
8743 // ==> MADD R,A,B,C
8744 // --- Create(MADD);
8746 Opc = AArch64::MADDWrrr;
8747 RC = &AArch64::GPR32RegClass;
8748 } else {
8749 Opc = AArch64::MADDXrrr;
8750 RC = &AArch64::GPR64RegClass;
8751 }
8752 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8753 break;
8758 // MUL I=A,B,0
8759 // ADD/SUB R,I,Imm
8760 // ==> MOV V, Imm/-Imm
8761 // ==> MADD R,A,B,V
8762 // --- Create(MADD);
8763 const TargetRegisterClass *RC;
8764 unsigned BitSize, MovImm;
8767 MovImm = AArch64::MOVi32imm;
8768 RC = &AArch64::GPR32spRegClass;
8769 BitSize = 32;
8770 Opc = AArch64::MADDWrrr;
8771 RC = &AArch64::GPR32RegClass;
8772 } else {
8773 MovImm = AArch64::MOVi64imm;
8774 RC = &AArch64::GPR64spRegClass;
8775 BitSize = 64;
8776 Opc = AArch64::MADDXrrr;
8777 RC = &AArch64::GPR64RegClass;
8778 }
8779 Register NewVR = MRI.createVirtualRegister(RC);
8780 uint64_t Imm = Root.getOperand(2).getImm();
8781
8782 if (Root.getOperand(3).isImm()) {
8783 unsigned Val = Root.getOperand(3).getImm();
8784 Imm = Imm << Val;
8785 }
8786 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8788 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8789 // Check that the immediate can be composed via a single instruction.
8791 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8792 if (Insn.size() != 1)
8793 return;
8794 MachineInstrBuilder MIB1 =
8795 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8796 .addImm(IsSub ? -Imm : Imm);
8797 InsInstrs.push_back(MIB1);
8798 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8799 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8800 break;
8801 }
8804 // MUL I=A,B,0
8805 // SUB R,I, C
8806 // ==> SUB V, 0, C
8807 // ==> MADD R,A,B,V // = -C + A*B
8808 // --- Create(MADD);
8809 const TargetRegisterClass *SubRC;
8810 unsigned SubOpc, ZeroReg;
8812 SubOpc = AArch64::SUBWrr;
8813 SubRC = &AArch64::GPR32spRegClass;
8814 ZeroReg = AArch64::WZR;
8815 Opc = AArch64::MADDWrrr;
8816 RC = &AArch64::GPR32RegClass;
8817 } else {
8818 SubOpc = AArch64::SUBXrr;
8819 SubRC = &AArch64::GPR64spRegClass;
8820 ZeroReg = AArch64::XZR;
8821 Opc = AArch64::MADDXrrr;
8822 RC = &AArch64::GPR64RegClass;
8823 }
8824 Register NewVR = MRI.createVirtualRegister(SubRC);
8825 // SUB NewVR, 0, C
8826 MachineInstrBuilder MIB1 =
8827 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8828 .addReg(ZeroReg)
8829 .add(Root.getOperand(2));
8830 InsInstrs.push_back(MIB1);
8831 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8832 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8833 break;
8834 }
8837 // MUL I=A,B,0
8838 // SUB R,C,I
8839 // ==> MSUB R,A,B,C (computes C - A*B)
8840 // --- Create(MSUB);
8842 Opc = AArch64::MSUBWrrr;
8843 RC = &AArch64::GPR32RegClass;
8844 } else {
8845 Opc = AArch64::MSUBXrrr;
8846 RC = &AArch64::GPR64RegClass;
8847 }
8848 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8849 break;
8851 Opc = AArch64::MLAv8i8;
8852 RC = &AArch64::FPR64RegClass;
8853 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8854 break;
8856 Opc = AArch64::MLAv8i8;
8857 RC = &AArch64::FPR64RegClass;
8858 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8859 break;
8861 Opc = AArch64::MLAv16i8;
8862 RC = &AArch64::FPR128RegClass;
8863 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8864 break;
8866 Opc = AArch64::MLAv16i8;
8867 RC = &AArch64::FPR128RegClass;
8868 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8869 break;
8871 Opc = AArch64::MLAv4i16;
8872 RC = &AArch64::FPR64RegClass;
8873 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8874 break;
8876 Opc = AArch64::MLAv4i16;
8877 RC = &AArch64::FPR64RegClass;
8878 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8879 break;
8881 Opc = AArch64::MLAv8i16;
8882 RC = &AArch64::FPR128RegClass;
8883 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8884 break;
8886 Opc = AArch64::MLAv8i16;
8887 RC = &AArch64::FPR128RegClass;
8888 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8889 break;
8891 Opc = AArch64::MLAv2i32;
8892 RC = &AArch64::FPR64RegClass;
8893 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8894 break;
8896 Opc = AArch64::MLAv2i32;
8897 RC = &AArch64::FPR64RegClass;
8898 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8899 break;
8901 Opc = AArch64::MLAv4i32;
8902 RC = &AArch64::FPR128RegClass;
8903 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8904 break;
8906 Opc = AArch64::MLAv4i32;
8907 RC = &AArch64::FPR128RegClass;
8908 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8909 break;
8910
8912 Opc = AArch64::MLAv8i8;
8913 RC = &AArch64::FPR64RegClass;
8914 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8915 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8916 RC);
8917 break;
8919 Opc = AArch64::MLSv8i8;
8920 RC = &AArch64::FPR64RegClass;
8921 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8922 break;
8924 Opc = AArch64::MLAv16i8;
8925 RC = &AArch64::FPR128RegClass;
8926 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8927 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8928 RC);
8929 break;
8931 Opc = AArch64::MLSv16i8;
8932 RC = &AArch64::FPR128RegClass;
8933 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8934 break;
8936 Opc = AArch64::MLAv4i16;
8937 RC = &AArch64::FPR64RegClass;
8938 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8939 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8940 RC);
8941 break;
8943 Opc = AArch64::MLSv4i16;
8944 RC = &AArch64::FPR64RegClass;
8945 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8946 break;
8948 Opc = AArch64::MLAv8i16;
8949 RC = &AArch64::FPR128RegClass;
8950 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8951 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8952 RC);
8953 break;
8955 Opc = AArch64::MLSv8i16;
8956 RC = &AArch64::FPR128RegClass;
8957 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8958 break;
8960 Opc = AArch64::MLAv2i32;
8961 RC = &AArch64::FPR64RegClass;
8962 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8963 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8964 RC);
8965 break;
8967 Opc = AArch64::MLSv2i32;
8968 RC = &AArch64::FPR64RegClass;
8969 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8970 break;
8972 Opc = AArch64::MLAv4i32;
8973 RC = &AArch64::FPR128RegClass;
8974 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8975 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8976 RC);
8977 break;
8979 Opc = AArch64::MLSv4i32;
8980 RC = &AArch64::FPR128RegClass;
8981 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8982 break;
8983
8985 Opc = AArch64::MLAv4i16_indexed;
8986 RC = &AArch64::FPR64RegClass;
8987 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8988 break;
8990 Opc = AArch64::MLAv4i16_indexed;
8991 RC = &AArch64::FPR64RegClass;
8992 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8993 break;
8995 Opc = AArch64::MLAv8i16_indexed;
8996 RC = &AArch64::FPR128RegClass;
8997 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8998 break;
9000 Opc = AArch64::MLAv8i16_indexed;
9001 RC = &AArch64::FPR128RegClass;
9002 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9003 break;
9005 Opc = AArch64::MLAv2i32_indexed;
9006 RC = &AArch64::FPR64RegClass;
9007 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9008 break;
9010 Opc = AArch64::MLAv2i32_indexed;
9011 RC = &AArch64::FPR64RegClass;
9012 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9013 break;
9015 Opc = AArch64::MLAv4i32_indexed;
9016 RC = &AArch64::FPR128RegClass;
9017 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9018 break;
9020 Opc = AArch64::MLAv4i32_indexed;
9021 RC = &AArch64::FPR128RegClass;
9022 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9023 break;
9024
9026 Opc = AArch64::MLAv4i16_indexed;
9027 RC = &AArch64::FPR64RegClass;
9028 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9029 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
9030 RC);
9031 break;
9033 Opc = AArch64::MLSv4i16_indexed;
9034 RC = &AArch64::FPR64RegClass;
9035 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9036 break;
9038 Opc = AArch64::MLAv8i16_indexed;
9039 RC = &AArch64::FPR128RegClass;
9040 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9041 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
9042 RC);
9043 break;
9045 Opc = AArch64::MLSv8i16_indexed;
9046 RC = &AArch64::FPR128RegClass;
9047 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9048 break;
9050 Opc = AArch64::MLAv2i32_indexed;
9051 RC = &AArch64::FPR64RegClass;
9052 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9053 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
9054 RC);
9055 break;
9057 Opc = AArch64::MLSv2i32_indexed;
9058 RC = &AArch64::FPR64RegClass;
9059 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9060 break;
9062 Opc = AArch64::MLAv4i32_indexed;
9063 RC = &AArch64::FPR128RegClass;
9064 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9065 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
9066 RC);
9067 break;
9069 Opc = AArch64::MLSv4i32_indexed;
9070 RC = &AArch64::FPR128RegClass;
9071 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9072 break;
9073
9074 // Floating Point Support
9076 Opc = AArch64::FMADDHrrr;
9077 RC = &AArch64::FPR16RegClass;
9078 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9079 break;
9081 Opc = AArch64::FMADDSrrr;
9082 RC = &AArch64::FPR32RegClass;
9083 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9084 break;
9086 Opc = AArch64::FMADDDrrr;
9087 RC = &AArch64::FPR64RegClass;
9088 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9089 break;
9090
9092 Opc = AArch64::FMADDHrrr;
9093 RC = &AArch64::FPR16RegClass;
9094 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9095 break;
9097 Opc = AArch64::FMADDSrrr;
9098 RC = &AArch64::FPR32RegClass;
9099 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9100 break;
9102 Opc = AArch64::FMADDDrrr;
9103 RC = &AArch64::FPR64RegClass;
9104 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9105 break;
9106
9108 Opc = AArch64::FMLAv1i32_indexed;
9109 RC = &AArch64::FPR32RegClass;
9110 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9112 break;
9114 Opc = AArch64::FMLAv1i32_indexed;
9115 RC = &AArch64::FPR32RegClass;
9116 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9118 break;
9119
9121 Opc = AArch64::FMLAv1i64_indexed;
9122 RC = &AArch64::FPR64RegClass;
9123 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9125 break;
9127 Opc = AArch64::FMLAv1i64_indexed;
9128 RC = &AArch64::FPR64RegClass;
9129 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9131 break;
9132
9134 RC = &AArch64::FPR64RegClass;
9135 Opc = AArch64::FMLAv4i16_indexed;
9136 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9138 break;
9140 RC = &AArch64::FPR64RegClass;
9141 Opc = AArch64::FMLAv4f16;
9142 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9144 break;
9146 RC = &AArch64::FPR64RegClass;
9147 Opc = AArch64::FMLAv4i16_indexed;
9148 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9150 break;
9152 RC = &AArch64::FPR64RegClass;
9153 Opc = AArch64::FMLAv4f16;
9154 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9156 break;
9157
9160 RC = &AArch64::FPR64RegClass;
9162 Opc = AArch64::FMLAv2i32_indexed;
9163 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9165 } else {
9166 Opc = AArch64::FMLAv2f32;
9167 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9169 }
9170 break;
9173 RC = &AArch64::FPR64RegClass;
9175 Opc = AArch64::FMLAv2i32_indexed;
9176 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9178 } else {
9179 Opc = AArch64::FMLAv2f32;
9180 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9182 }
9183 break;
9184
9186 RC = &AArch64::FPR128RegClass;
9187 Opc = AArch64::FMLAv8i16_indexed;
9188 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9190 break;
9192 RC = &AArch64::FPR128RegClass;
9193 Opc = AArch64::FMLAv8f16;
9194 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9196 break;
9198 RC = &AArch64::FPR128RegClass;
9199 Opc = AArch64::FMLAv8i16_indexed;
9200 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9202 break;
9204 RC = &AArch64::FPR128RegClass;
9205 Opc = AArch64::FMLAv8f16;
9206 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9208 break;
9209
9212 RC = &AArch64::FPR128RegClass;
9214 Opc = AArch64::FMLAv2i64_indexed;
9215 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9217 } else {
9218 Opc = AArch64::FMLAv2f64;
9219 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9221 }
9222 break;
9225 RC = &AArch64::FPR128RegClass;
9227 Opc = AArch64::FMLAv2i64_indexed;
9228 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9230 } else {
9231 Opc = AArch64::FMLAv2f64;
9232 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9234 }
9235 break;
9236
9239 RC = &AArch64::FPR128RegClass;
9241 Opc = AArch64::FMLAv4i32_indexed;
9242 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9244 } else {
9245 Opc = AArch64::FMLAv4f32;
9246 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9248 }
9249 break;
9250
9253 RC = &AArch64::FPR128RegClass;
9255 Opc = AArch64::FMLAv4i32_indexed;
9256 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9258 } else {
9259 Opc = AArch64::FMLAv4f32;
9260 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9262 }
9263 break;
9264
9266 Opc = AArch64::FNMSUBHrrr;
9267 RC = &AArch64::FPR16RegClass;
9268 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9269 break;
9271 Opc = AArch64::FNMSUBSrrr;
9272 RC = &AArch64::FPR32RegClass;
9273 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9274 break;
9276 Opc = AArch64::FNMSUBDrrr;
9277 RC = &AArch64::FPR64RegClass;
9278 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9279 break;
9280
9282 Opc = AArch64::FNMADDHrrr;
9283 RC = &AArch64::FPR16RegClass;
9284 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9285 break;
9287 Opc = AArch64::FNMADDSrrr;
9288 RC = &AArch64::FPR32RegClass;
9289 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9290 break;
9292 Opc = AArch64::FNMADDDrrr;
9293 RC = &AArch64::FPR64RegClass;
9294 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9295 break;
9296
9298 Opc = AArch64::FMSUBHrrr;
9299 RC = &AArch64::FPR16RegClass;
9300 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9301 break;
9303 Opc = AArch64::FMSUBSrrr;
9304 RC = &AArch64::FPR32RegClass;
9305 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9306 break;
9308 Opc = AArch64::FMSUBDrrr;
9309 RC = &AArch64::FPR64RegClass;
9310 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9311 break;
9312
9314 Opc = AArch64::FMLSv1i32_indexed;
9315 RC = &AArch64::FPR32RegClass;
9316 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9318 break;
9319
9321 Opc = AArch64::FMLSv1i64_indexed;
9322 RC = &AArch64::FPR64RegClass;
9323 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9325 break;
9326
9329 RC = &AArch64::FPR64RegClass;
9330 Register NewVR = MRI.createVirtualRegister(RC);
9331 MachineInstrBuilder MIB1 =
9332 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
9333 .add(Root.getOperand(2));
9334 InsInstrs.push_back(MIB1);
9335 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9337 Opc = AArch64::FMLAv4f16;
9338 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9339 FMAInstKind::Accumulator, &NewVR);
9340 } else {
9341 Opc = AArch64::FMLAv4i16_indexed;
9342 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9343 FMAInstKind::Indexed, &NewVR);
9344 }
9345 break;
9346 }
9348 RC = &AArch64::FPR64RegClass;
9349 Opc = AArch64::FMLSv4f16;
9350 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9352 break;
9354 RC = &AArch64::FPR64RegClass;
9355 Opc = AArch64::FMLSv4i16_indexed;
9356 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9358 break;
9359
9362 RC = &AArch64::FPR64RegClass;
9364 Opc = AArch64::FMLSv2i32_indexed;
9365 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9367 } else {
9368 Opc = AArch64::FMLSv2f32;
9369 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9371 }
9372 break;
9373
9376 RC = &AArch64::FPR128RegClass;
9377 Register NewVR = MRI.createVirtualRegister(RC);
9378 MachineInstrBuilder MIB1 =
9379 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
9380 .add(Root.getOperand(2));
9381 InsInstrs.push_back(MIB1);
9382 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9384 Opc = AArch64::FMLAv8f16;
9385 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9386 FMAInstKind::Accumulator, &NewVR);
9387 } else {
9388 Opc = AArch64::FMLAv8i16_indexed;
9389 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9390 FMAInstKind::Indexed, &NewVR);
9391 }
9392 break;
9393 }
9395 RC = &AArch64::FPR128RegClass;
9396 Opc = AArch64::FMLSv8f16;
9397 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9399 break;
9401 RC = &AArch64::FPR128RegClass;
9402 Opc = AArch64::FMLSv8i16_indexed;
9403 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9405 break;
9406
9409 RC = &AArch64::FPR128RegClass;
9411 Opc = AArch64::FMLSv2i64_indexed;
9412 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9414 } else {
9415 Opc = AArch64::FMLSv2f64;
9416 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9418 }
9419 break;
9420
9423 RC = &AArch64::FPR128RegClass;
9425 Opc = AArch64::FMLSv4i32_indexed;
9426 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9428 } else {
9429 Opc = AArch64::FMLSv4f32;
9430 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9432 }
9433 break;
9436 RC = &AArch64::FPR64RegClass;
9437 Register NewVR = MRI.createVirtualRegister(RC);
9438 MachineInstrBuilder MIB1 =
9439 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9440 .add(Root.getOperand(2));
9441 InsInstrs.push_back(MIB1);
9442 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9444 Opc = AArch64::FMLAv2i32_indexed;
9445 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9446 FMAInstKind::Indexed, &NewVR);
9447 } else {
9448 Opc = AArch64::FMLAv2f32;
9449 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9450 FMAInstKind::Accumulator, &NewVR);
9451 }
9452 break;
9453 }
9456 RC = &AArch64::FPR128RegClass;
9457 Register NewVR = MRI.createVirtualRegister(RC);
9458 MachineInstrBuilder MIB1 =
9459 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9460 .add(Root.getOperand(2));
9461 InsInstrs.push_back(MIB1);
9462 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9464 Opc = AArch64::FMLAv4i32_indexed;
9465 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9466 FMAInstKind::Indexed, &NewVR);
9467 } else {
9468 Opc = AArch64::FMLAv4f32;
9469 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9470 FMAInstKind::Accumulator, &NewVR);
9471 }
9472 break;
9473 }
9476 RC = &AArch64::FPR128RegClass;
9477 Register NewVR = MRI.createVirtualRegister(RC);
9478 MachineInstrBuilder MIB1 =
9479 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9480 .add(Root.getOperand(2));
9481 InsInstrs.push_back(MIB1);
9482 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9484 Opc = AArch64::FMLAv2i64_indexed;
9485 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9486 FMAInstKind::Indexed, &NewVR);
9487 } else {
9488 Opc = AArch64::FMLAv2f64;
9489 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9490 FMAInstKind::Accumulator, &NewVR);
9491 }
9492 break;
9493 }
9496 unsigned IdxDupOp =
9498 : 2;
9499 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9500 &AArch64::FPR128RegClass, MRI);
9501 break;
9502 }
9505 unsigned IdxDupOp =
9507 : 2;
9508 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9509 &AArch64::FPR128RegClass, MRI);
9510 break;
9511 }
9514 unsigned IdxDupOp =
9516 : 2;
9517 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9518 &AArch64::FPR128_loRegClass, MRI);
9519 break;
9520 }
9523 unsigned IdxDupOp =
9525 : 2;
9526 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9527 &AArch64::FPR128RegClass, MRI);
9528 break;
9529 }
9532 unsigned IdxDupOp =
9534 : 2;
9535 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9536 &AArch64::FPR128_loRegClass, MRI);
9537 break;
9538 }
9540 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9541 break;
9542 }
9544 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9545 Pattern, 4);
9546 break;
9547 }
9549 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9550 Pattern, 8);
9551 break;
9552 }
9554 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9555 Pattern, 16);
9556 break;
9557 }
9558
9559 } // end switch (Pattern)
9560 // Record MUL and ADD/SUB for deletion
9561 if (MUL)
9562 DelInstrs.push_back(MUL);
9563 DelInstrs.push_back(&Root);
9564
9565 // Set the flags on the inserted instructions to be the merged flags of the
9566 // instructions that we have combined.
9567 uint32_t Flags = Root.getFlags();
9568 if (MUL)
9569 Flags = Root.mergeFlagsWith(*MUL);
9570 for (auto *MI : InsInstrs)
9571 MI->setFlags(Flags);
9572}
9573
9574/// Replace csincr-branch sequence by simple conditional branch
9575///
9576/// Examples:
9577/// 1. \code
9578/// csinc w9, wzr, wzr, <condition code>
9579/// tbnz w9, #0, 0x44
9580/// \endcode
9581/// to
9582/// \code
9583/// b.<inverted condition code>
9584/// \endcode
9585///
9586/// 2. \code
9587/// csinc w9, wzr, wzr, <condition code>
9588/// tbz w9, #0, 0x44
9589/// \endcode
9590/// to
9591/// \code
9592/// b.<condition code>
9593/// \endcode
9594///
9595/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9596/// compare's constant operand is power of 2.
9597///
9598/// Examples:
9599/// \code
9600/// and w8, w8, #0x400
9601/// cbnz w8, L1
9602/// \endcode
9603/// to
9604/// \code
9605/// tbnz w8, #10, L1
9606/// \endcode
9607///
9608/// \param MI Conditional Branch
9609/// \return True when the simple conditional branch is generated
9610///
9612 bool IsNegativeBranch = false;
9613 bool IsTestAndBranch = false;
9614 unsigned TargetBBInMI = 0;
9615 switch (MI.getOpcode()) {
9616 default:
9617 llvm_unreachable("Unknown branch instruction?");
9618 case AArch64::Bcc:
9619 case AArch64::CBWPri:
9620 case AArch64::CBXPri:
9621 case AArch64::CBBAssertExt:
9622 case AArch64::CBHAssertExt:
9623 case AArch64::CBWPrr:
9624 case AArch64::CBXPrr:
9625 return false;
9626 case AArch64::CBZW:
9627 case AArch64::CBZX:
9628 TargetBBInMI = 1;
9629 break;
9630 case AArch64::CBNZW:
9631 case AArch64::CBNZX:
9632 TargetBBInMI = 1;
9633 IsNegativeBranch = true;
9634 break;
9635 case AArch64::TBZW:
9636 case AArch64::TBZX:
9637 TargetBBInMI = 2;
9638 IsTestAndBranch = true;
9639 break;
9640 case AArch64::TBNZW:
9641 case AArch64::TBNZX:
9642 TargetBBInMI = 2;
9643 IsNegativeBranch = true;
9644 IsTestAndBranch = true;
9645 break;
9646 }
9647 // So we increment a zero register and test for bits other
9648 // than bit 0? Conservatively bail out in case the verifier
9649 // missed this case.
9650 if (IsTestAndBranch && MI.getOperand(1).getImm())
9651 return false;
9652
9653 // Find Definition.
9654 assert(MI.getParent() && "Incomplete machine instruction\n");
9655 MachineBasicBlock *MBB = MI.getParent();
9656 MachineFunction *MF = MBB->getParent();
9658 Register VReg = MI.getOperand(0).getReg();
9659 if (!VReg.isVirtual())
9660 return false;
9661
9662 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9663
9664 // Look through COPY instructions to find definition.
9665 while (DefMI->isCopy()) {
9666 Register CopyVReg = DefMI->getOperand(1).getReg();
9667 if (!MRI->hasOneNonDBGUse(CopyVReg))
9668 return false;
9669 if (!MRI->hasOneDef(CopyVReg))
9670 return false;
9671 DefMI = MRI->getVRegDef(CopyVReg);
9672 }
9673
9674 switch (DefMI->getOpcode()) {
9675 default:
9676 return false;
9677 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9678 case AArch64::ANDWri:
9679 case AArch64::ANDXri: {
9680 if (IsTestAndBranch)
9681 return false;
9682 if (DefMI->getParent() != MBB)
9683 return false;
9684 if (!MRI->hasOneNonDBGUse(VReg))
9685 return false;
9686
9687 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9689 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9690 if (!isPowerOf2_64(Mask))
9691 return false;
9692
9693 MachineOperand &MO = DefMI->getOperand(1);
9694 Register NewReg = MO.getReg();
9695 if (!NewReg.isVirtual())
9696 return false;
9697
9698 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9699
9700 MachineBasicBlock &RefToMBB = *MBB;
9701 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9702 DebugLoc DL = MI.getDebugLoc();
9703 unsigned Imm = Log2_64(Mask);
9704 unsigned Opc = (Imm < 32)
9705 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9706 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9707 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9708 .addReg(NewReg)
9709 .addImm(Imm)
9710 .addMBB(TBB);
9711 // Register lives on to the CBZ now.
9712 MO.setIsKill(false);
9713
9714 // For immediate smaller than 32, we need to use the 32-bit
9715 // variant (W) in all cases. Indeed the 64-bit variant does not
9716 // allow to encode them.
9717 // Therefore, if the input register is 64-bit, we need to take the
9718 // 32-bit sub-part.
9719 if (!Is32Bit && Imm < 32)
9720 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9721 MI.eraseFromParent();
9722 return true;
9723 }
9724 // Look for CSINC
9725 case AArch64::CSINCWr:
9726 case AArch64::CSINCXr: {
9727 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9728 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9729 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9730 DefMI->getOperand(2).getReg() == AArch64::XZR))
9731 return false;
9732
9733 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9734 true) != -1)
9735 return false;
9736
9737 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9738 // Convert only when the condition code is not modified between
9739 // the CSINC and the branch. The CC may be used by other
9740 // instructions in between.
9742 return false;
9743 MachineBasicBlock &RefToMBB = *MBB;
9744 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9745 DebugLoc DL = MI.getDebugLoc();
9746 if (IsNegativeBranch)
9748 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9749 MI.eraseFromParent();
9750 return true;
9751 }
9752 }
9753}
9754
9755std::pair<unsigned, unsigned>
9756AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9757 const unsigned Mask = AArch64II::MO_FRAGMENT;
9758 return std::make_pair(TF & Mask, TF & ~Mask);
9759}
9760
9762AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9763 using namespace AArch64II;
9764
9765 static const std::pair<unsigned, const char *> TargetFlags[] = {
9766 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9767 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9768 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9769 {MO_HI12, "aarch64-hi12"}};
9770 return ArrayRef(TargetFlags);
9771}
9772
9774AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9775 using namespace AArch64II;
9776
9777 static const std::pair<unsigned, const char *> TargetFlags[] = {
9778 {MO_COFFSTUB, "aarch64-coffstub"},
9779 {MO_GOT, "aarch64-got"},
9780 {MO_NC, "aarch64-nc"},
9781 {MO_S, "aarch64-s"},
9782 {MO_TLS, "aarch64-tls"},
9783 {MO_DLLIMPORT, "aarch64-dllimport"},
9784 {MO_PREL, "aarch64-prel"},
9785 {MO_TAGGED, "aarch64-tagged"},
9786 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9787 };
9788 return ArrayRef(TargetFlags);
9789}
9790
9792AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9793 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9794 {{MOSuppressPair, "aarch64-suppress-pair"},
9795 {MOStridedAccess, "aarch64-strided-access"}};
9796 return ArrayRef(TargetFlags);
9797}
9798
9799/// Constants defining how certain sequences should be outlined.
9800/// This encompasses how an outlined function should be called, and what kind of
9801/// frame should be emitted for that outlined function.
9802///
9803/// \p MachineOutlinerDefault implies that the function should be called with
9804/// a save and restore of LR to the stack.
9805///
9806/// That is,
9807///
9808/// I1 Save LR OUTLINED_FUNCTION:
9809/// I2 --> BL OUTLINED_FUNCTION I1
9810/// I3 Restore LR I2
9811/// I3
9812/// RET
9813///
9814/// * Call construction overhead: 3 (save + BL + restore)
9815/// * Frame construction overhead: 1 (ret)
9816/// * Requires stack fixups? Yes
9817///
9818/// \p MachineOutlinerTailCall implies that the function is being created from
9819/// a sequence of instructions ending in a return.
9820///
9821/// That is,
9822///
9823/// I1 OUTLINED_FUNCTION:
9824/// I2 --> B OUTLINED_FUNCTION I1
9825/// RET I2
9826/// RET
9827///
9828/// * Call construction overhead: 1 (B)
9829/// * Frame construction overhead: 0 (Return included in sequence)
9830/// * Requires stack fixups? No
9831///
9832/// \p MachineOutlinerNoLRSave implies that the function should be called using
9833/// a BL instruction, but doesn't require LR to be saved and restored. This
9834/// happens when LR is known to be dead.
9835///
9836/// That is,
9837///
9838/// I1 OUTLINED_FUNCTION:
9839/// I2 --> BL OUTLINED_FUNCTION I1
9840/// I3 I2
9841/// I3
9842/// RET
9843///
9844/// * Call construction overhead: 1 (BL)
9845/// * Frame construction overhead: 1 (RET)
9846/// * Requires stack fixups? No
9847///
9848/// \p MachineOutlinerThunk implies that the function is being created from
9849/// a sequence of instructions ending in a call. The outlined function is
9850/// called with a BL instruction, and the outlined function tail-calls the
9851/// original call destination.
9852///
9853/// That is,
9854///
9855/// I1 OUTLINED_FUNCTION:
9856/// I2 --> BL OUTLINED_FUNCTION I1
9857/// BL f I2
9858/// B f
9859/// * Call construction overhead: 1 (BL)
9860/// * Frame construction overhead: 0
9861/// * Requires stack fixups? No
9862///
9863/// \p MachineOutlinerRegSave implies that the function should be called with a
9864/// save and restore of LR to an available register. This allows us to avoid
9865/// stack fixups. Note that this outlining variant is compatible with the
9866/// NoLRSave case.
9867///
9868/// That is,
9869///
9870/// I1 Save LR OUTLINED_FUNCTION:
9871/// I2 --> BL OUTLINED_FUNCTION I1
9872/// I3 Restore LR I2
9873/// I3
9874/// RET
9875///
9876/// * Call construction overhead: 3 (save + BL + restore)
9877/// * Frame construction overhead: 1 (ret)
9878/// * Requires stack fixups? No
9880 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9881 MachineOutlinerTailCall, /// Only emit a branch.
9882 MachineOutlinerNoLRSave, /// Emit a call and return.
9883 MachineOutlinerThunk, /// Emit a call and tail-call.
9884 MachineOutlinerRegSave /// Same as default, but save to a register.
9885};
9886
9892
9894AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9895 MachineFunction *MF = C.getMF();
9896 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9897 const AArch64RegisterInfo *ARI =
9898 static_cast<const AArch64RegisterInfo *>(&TRI);
9899 // Check if there is an available register across the sequence that we can
9900 // use.
9901 for (unsigned Reg : AArch64::GPR64RegClass) {
9902 if (!ARI->isReservedReg(*MF, Reg) &&
9903 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9904 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9905 Reg != AArch64::X17 && // Ditto for X17.
9906 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9907 C.isAvailableInsideSeq(Reg, TRI))
9908 return Reg;
9909 }
9910 return Register();
9911}
9912
9913static bool
9915 const outliner::Candidate &b) {
9916 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9917 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9918
9919 return MFIa->getSignReturnAddressCondition() ==
9921}
9922
9923static bool
9925 const outliner::Candidate &b) {
9926 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9927 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9928
9929 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9930}
9931
9933 const outliner::Candidate &b) {
9934 const AArch64Subtarget &SubtargetA =
9936 const AArch64Subtarget &SubtargetB =
9937 b.getMF()->getSubtarget<AArch64Subtarget>();
9938 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9939}
9940
9941std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9942AArch64InstrInfo::getOutliningCandidateInfo(
9943 const MachineModuleInfo &MMI,
9944 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9945 unsigned MinRepeats) const {
9946 unsigned SequenceSize = 0;
9947 for (auto &MI : RepeatedSequenceLocs[0])
9948 SequenceSize += getInstSizeInBytes(MI);
9949
9950 unsigned NumBytesToCreateFrame = 0;
9951
9952 // Avoid splitting ADRP ADD/LDR pair into outlined functions.
9953 // These instructions are fused together by the scheduler.
9954 // Any candidate where ADRP is the last instruction should be rejected
9955 // as that will lead to splitting ADRP pair.
9956 MachineInstr &LastMI = RepeatedSequenceLocs[0].back();
9957 MachineInstr &FirstMI = RepeatedSequenceLocs[0].front();
9958 if (LastMI.getOpcode() == AArch64::ADRP &&
9959 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 &&
9960 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) {
9961 return std::nullopt;
9962 }
9963
9964 // Similarly any candidate where the first instruction is ADD/LDR with a
9965 // page offset should be rejected to avoid ADRP splitting.
9966 if ((FirstMI.getOpcode() == AArch64::ADDXri ||
9967 FirstMI.getOpcode() == AArch64::LDRXui) &&
9968 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 &&
9969 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) {
9970 return std::nullopt;
9971 }
9972
9973 // We only allow outlining for functions having exactly matching return
9974 // address signing attributes, i.e., all share the same value for the
9975 // attribute "sign-return-address" and all share the same type of key they
9976 // are signed with.
9977 // Additionally we require all functions to simultaneously either support
9978 // v8.3a features or not. Otherwise an outlined function could get signed
9979 // using dedicated v8.3 instructions and a call from a function that doesn't
9980 // support v8.3 instructions would therefore be invalid.
9981 if (std::adjacent_find(
9982 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9983 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9984 // Return true if a and b are non-equal w.r.t. return address
9985 // signing or support of v8.3a features
9986 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9987 outliningCandidatesSigningKeyConsensus(a, b) &&
9988 outliningCandidatesV8_3OpsConsensus(a, b)) {
9989 return false;
9990 }
9991 return true;
9992 }) != RepeatedSequenceLocs.end()) {
9993 return std::nullopt;
9994 }
9995
9996 // Since at this point all candidates agree on their return address signing
9997 // picking just one is fine. If the candidate functions potentially sign their
9998 // return addresses, the outlined function should do the same. Note that in
9999 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
10000 // not certainly true that the outlined function will have to sign its return
10001 // address but this decision is made later, when the decision to outline
10002 // has already been made.
10003 // The same holds for the number of additional instructions we need: On
10004 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
10005 // necessary. However, at this point we don't know if the outlined function
10006 // will have a RET instruction so we assume the worst.
10007 const TargetRegisterInfo &TRI = getRegisterInfo();
10008 // Performing a tail call may require extra checks when PAuth is enabled.
10009 // If PAuth is disabled, set it to zero for uniformity.
10010 unsigned NumBytesToCheckLRInTCEpilogue = 0;
10011 const auto RASignCondition = RepeatedSequenceLocs[0]
10012 .getMF()
10013 ->getInfo<AArch64FunctionInfo>()
10014 ->getSignReturnAddressCondition();
10015 if (RASignCondition != SignReturnAddress::None) {
10016 // One PAC and one AUT instructions
10017 NumBytesToCreateFrame += 8;
10018
10019 // PAuth is enabled - set extra tail call cost, if any.
10020 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
10021 *RepeatedSequenceLocs[0].getMF());
10022 NumBytesToCheckLRInTCEpilogue =
10024 // Checking the authenticated LR value may significantly impact
10025 // SequenceSize, so account for it for more precise results.
10026 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
10027 SequenceSize += NumBytesToCheckLRInTCEpilogue;
10028
10029 // We have to check if sp modifying instructions would get outlined.
10030 // If so we only allow outlining if sp is unchanged overall, so matching
10031 // sub and add instructions are okay to outline, all other sp modifications
10032 // are not
10033 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
10034 int SPValue = 0;
10035 for (auto &MI : C) {
10036 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
10037 switch (MI.getOpcode()) {
10038 case AArch64::ADDXri:
10039 case AArch64::ADDWri:
10040 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
10041 assert(MI.getOperand(2).isImm() &&
10042 "Expected operand to be immediate");
10043 assert(MI.getOperand(1).isReg() &&
10044 "Expected operand to be a register");
10045 // Check if the add just increments sp. If so, we search for
10046 // matching sub instructions that decrement sp. If not, the
10047 // modification is illegal
10048 if (MI.getOperand(1).getReg() == AArch64::SP)
10049 SPValue += MI.getOperand(2).getImm();
10050 else
10051 return true;
10052 break;
10053 case AArch64::SUBXri:
10054 case AArch64::SUBWri:
10055 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
10056 assert(MI.getOperand(2).isImm() &&
10057 "Expected operand to be immediate");
10058 assert(MI.getOperand(1).isReg() &&
10059 "Expected operand to be a register");
10060 // Check if the sub just decrements sp. If so, we search for
10061 // matching add instructions that increment sp. If not, the
10062 // modification is illegal
10063 if (MI.getOperand(1).getReg() == AArch64::SP)
10064 SPValue -= MI.getOperand(2).getImm();
10065 else
10066 return true;
10067 break;
10068 default:
10069 return true;
10070 }
10071 }
10072 }
10073 if (SPValue)
10074 return true;
10075 return false;
10076 };
10077 // Remove candidates with illegal stack modifying instructions
10078 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
10079
10080 // If the sequence doesn't have enough candidates left, then we're done.
10081 if (RepeatedSequenceLocs.size() < MinRepeats)
10082 return std::nullopt;
10083 }
10084
10085 // Properties about candidate MBBs that hold for all of them.
10086 unsigned FlagsSetInAll = 0xF;
10087
10088 // Compute liveness information for each candidate, and set FlagsSetInAll.
10089 for (outliner::Candidate &C : RepeatedSequenceLocs)
10090 FlagsSetInAll &= C.Flags;
10091
10092 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
10093
10094 // Helper lambda which sets call information for every candidate.
10095 auto SetCandidateCallInfo =
10096 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
10097 for (outliner::Candidate &C : RepeatedSequenceLocs)
10098 C.setCallInfo(CallID, NumBytesForCall);
10099 };
10100
10101 unsigned FrameID = MachineOutlinerDefault;
10102 NumBytesToCreateFrame += 4;
10103
10104 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
10105 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
10106 });
10107
10108 // We check to see if CFI Instructions are present, and if they are
10109 // we find the number of CFI Instructions in the candidates.
10110 unsigned CFICount = 0;
10111 for (auto &I : RepeatedSequenceLocs[0]) {
10112 if (I.isCFIInstruction())
10113 CFICount++;
10114 }
10115
10116 // We compare the number of found CFI Instructions to the number of CFI
10117 // instructions in the parent function for each candidate. We must check this
10118 // since if we outline one of the CFI instructions in a function, we have to
10119 // outline them all for correctness. If we do not, the address offsets will be
10120 // incorrect between the two sections of the program.
10121 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10122 std::vector<MCCFIInstruction> CFIInstructions =
10123 C.getMF()->getFrameInstructions();
10124
10125 if (CFICount > 0 && CFICount != CFIInstructions.size())
10126 return std::nullopt;
10127 }
10128
10129 // Returns true if an instructions is safe to fix up, false otherwise.
10130 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
10131 if (MI.isCall())
10132 return true;
10133
10134 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
10135 !MI.readsRegister(AArch64::SP, &TRI))
10136 return true;
10137
10138 // Any modification of SP will break our code to save/restore LR.
10139 // FIXME: We could handle some instructions which add a constant
10140 // offset to SP, with a bit more work.
10141 if (MI.modifiesRegister(AArch64::SP, &TRI))
10142 return false;
10143
10144 // At this point, we have a stack instruction that we might need to
10145 // fix up. We'll handle it if it's a load or store.
10146 if (MI.mayLoadOrStore()) {
10147 const MachineOperand *Base; // Filled with the base operand of MI.
10148 int64_t Offset; // Filled with the offset of MI.
10149 bool OffsetIsScalable;
10150
10151 // Does it allow us to offset the base operand and is the base the
10152 // register SP?
10153 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
10154 !Base->isReg() || Base->getReg() != AArch64::SP)
10155 return false;
10156
10157 // Fixe-up code below assumes bytes.
10158 if (OffsetIsScalable)
10159 return false;
10160
10161 // Find the minimum/maximum offset for this instruction and check
10162 // if fixing it up would be in range.
10163 int64_t MinOffset,
10164 MaxOffset; // Unscaled offsets for the instruction.
10165 // The scale to multiply the offsets by.
10166 TypeSize Scale(0U, false), DummyWidth(0U, false);
10167 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
10168
10169 Offset += 16; // Update the offset to what it would be if we outlined.
10170 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
10171 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
10172 return false;
10173
10174 // It's in range, so we can outline it.
10175 return true;
10176 }
10177
10178 // FIXME: Add handling for instructions like "add x0, sp, #8".
10179
10180 // We can't fix it up, so don't outline it.
10181 return false;
10182 };
10183
10184 // True if it's possible to fix up each stack instruction in this sequence.
10185 // Important for frames/call variants that modify the stack.
10186 bool AllStackInstrsSafe =
10187 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
10188
10189 // If the last instruction in any candidate is a terminator, then we should
10190 // tail call all of the candidates.
10191 if (RepeatedSequenceLocs[0].back().isTerminator()) {
10192 FrameID = MachineOutlinerTailCall;
10193 NumBytesToCreateFrame = 0;
10194 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
10195 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
10196 }
10197
10198 else if (LastInstrOpcode == AArch64::BL ||
10199 ((LastInstrOpcode == AArch64::BLR ||
10200 LastInstrOpcode == AArch64::BLRNoIP) &&
10201 !HasBTI)) {
10202 // FIXME: Do we need to check if the code after this uses the value of LR?
10203 FrameID = MachineOutlinerThunk;
10204 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
10205 SetCandidateCallInfo(MachineOutlinerThunk, 4);
10206 }
10207
10208 else {
10209 // We need to decide how to emit calls + frames. We can always emit the same
10210 // frame if we don't need to save to the stack. If we have to save to the
10211 // stack, then we need a different frame.
10212 unsigned NumBytesNoStackCalls = 0;
10213 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
10214
10215 // Check if we have to save LR.
10216 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10217 bool LRAvailable =
10219 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
10220 : true;
10221 // If we have a noreturn caller, then we're going to be conservative and
10222 // say that we have to save LR. If we don't have a ret at the end of the
10223 // block, then we can't reason about liveness accurately.
10224 //
10225 // FIXME: We can probably do better than always disabling this in
10226 // noreturn functions by fixing up the liveness info.
10227 bool IsNoReturn =
10228 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
10229
10230 // Is LR available? If so, we don't need a save.
10231 if (LRAvailable && !IsNoReturn) {
10232 NumBytesNoStackCalls += 4;
10233 C.setCallInfo(MachineOutlinerNoLRSave, 4);
10234 CandidatesWithoutStackFixups.push_back(C);
10235 }
10236
10237 // Is an unused register available? If so, we won't modify the stack, so
10238 // we can outline with the same frame type as those that don't save LR.
10239 else if (findRegisterToSaveLRTo(C)) {
10240 NumBytesNoStackCalls += 12;
10241 C.setCallInfo(MachineOutlinerRegSave, 12);
10242 CandidatesWithoutStackFixups.push_back(C);
10243 }
10244
10245 // Is SP used in the sequence at all? If not, we don't have to modify
10246 // the stack, so we are guaranteed to get the same frame.
10247 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
10248 NumBytesNoStackCalls += 12;
10249 C.setCallInfo(MachineOutlinerDefault, 12);
10250 CandidatesWithoutStackFixups.push_back(C);
10251 }
10252
10253 // If we outline this, we need to modify the stack. Pretend we don't
10254 // outline this by saving all of its bytes.
10255 else {
10256 NumBytesNoStackCalls += SequenceSize;
10257 }
10258 }
10259
10260 // If there are no places where we have to save LR, then note that we
10261 // don't have to update the stack. Otherwise, give every candidate the
10262 // default call type, as long as it's safe to do so.
10263 if (!AllStackInstrsSafe ||
10264 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
10265 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
10266 FrameID = MachineOutlinerNoLRSave;
10267 if (RepeatedSequenceLocs.size() < MinRepeats)
10268 return std::nullopt;
10269 } else {
10270 SetCandidateCallInfo(MachineOutlinerDefault, 12);
10271
10272 // Bugzilla ID: 46767
10273 // TODO: Check if fixing up the stack more than once is safe so we can
10274 // outline these.
10275 //
10276 // An outline resulting in a caller that requires stack fixups at the
10277 // callsite to a callee that also requires stack fixups can happen when
10278 // there are no available registers at the candidate callsite for a
10279 // candidate that itself also has calls.
10280 //
10281 // In other words if function_containing_sequence in the following pseudo
10282 // assembly requires that we save LR at the point of the call, but there
10283 // are no available registers: in this case we save using SP and as a
10284 // result the SP offsets requires stack fixups by multiples of 16.
10285 //
10286 // function_containing_sequence:
10287 // ...
10288 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10289 // call OUTLINED_FUNCTION_N
10290 // restore LR from SP
10291 // ...
10292 //
10293 // OUTLINED_FUNCTION_N:
10294 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10295 // ...
10296 // bl foo
10297 // restore LR from SP
10298 // ret
10299 //
10300 // Because the code to handle more than one stack fixup does not
10301 // currently have the proper checks for legality, these cases will assert
10302 // in the AArch64 MachineOutliner. This is because the code to do this
10303 // needs more hardening, testing, better checks that generated code is
10304 // legal, etc and because it is only verified to handle a single pass of
10305 // stack fixup.
10306 //
10307 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
10308 // these cases until they are known to be handled. Bugzilla 46767 is
10309 // referenced in comments at the assert site.
10310 //
10311 // To avoid asserting (or generating non-legal code on noassert builds)
10312 // we remove all candidates which would need more than one stack fixup by
10313 // pruning the cases where the candidate has calls while also having no
10314 // available LR and having no available general purpose registers to copy
10315 // LR to (ie one extra stack save/restore).
10316 //
10317 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10318 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
10319 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
10320 return (llvm::any_of(C, IsCall)) &&
10321 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
10322 !findRegisterToSaveLRTo(C));
10323 });
10324 }
10325 }
10326
10327 // If we dropped all of the candidates, bail out here.
10328 if (RepeatedSequenceLocs.size() < MinRepeats)
10329 return std::nullopt;
10330 }
10331
10332 // Does every candidate's MBB contain a call? If so, then we might have a call
10333 // in the range.
10334 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10335 // Check if the range contains a call. These require a save + restore of the
10336 // link register.
10337 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
10338 bool ModStackToSaveLR = false;
10339 if (any_of(drop_end(FirstCand),
10340 [](const MachineInstr &MI) { return MI.isCall(); }))
10341 ModStackToSaveLR = true;
10342
10343 // Handle the last instruction separately. If this is a tail call, then the
10344 // last instruction is a call. We don't want to save + restore in this case.
10345 // However, it could be possible that the last instruction is a call without
10346 // it being valid to tail call this sequence. We should consider this as
10347 // well.
10348 else if (FrameID != MachineOutlinerThunk &&
10349 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
10350 ModStackToSaveLR = true;
10351
10352 if (ModStackToSaveLR) {
10353 // We can't fix up the stack. Bail out.
10354 if (!AllStackInstrsSafe)
10355 return std::nullopt;
10356
10357 // Save + restore LR.
10358 NumBytesToCreateFrame += 8;
10359 }
10360 }
10361
10362 // If we have CFI instructions, we can only outline if the outlined section
10363 // can be a tail call
10364 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
10365 return std::nullopt;
10366
10367 return std::make_unique<outliner::OutlinedFunction>(
10368 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
10369}
10370
10371void AArch64InstrInfo::mergeOutliningCandidateAttributes(
10372 Function &F, std::vector<outliner::Candidate> &Candidates) const {
10373 // If a bunch of candidates reach this point they must agree on their return
10374 // address signing. It is therefore enough to just consider the signing
10375 // behaviour of one of them
10376 const auto &CFn = Candidates.front().getMF()->getFunction();
10377
10378 if (CFn.hasFnAttribute("ptrauth-returns"))
10379 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
10380 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
10381 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
10382 // Since all candidates belong to the same module, just copy the
10383 // function-level attributes of an arbitrary function.
10384 if (CFn.hasFnAttribute("sign-return-address"))
10385 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
10386 if (CFn.hasFnAttribute("sign-return-address-key"))
10387 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
10388
10389 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
10390}
10391
10392bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
10393 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10394 const Function &F = MF.getFunction();
10395
10396 // Can F be deduplicated by the linker? If it can, don't outline from it.
10397 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10398 return false;
10399
10400 // Don't outline from functions with section markings; the program could
10401 // expect that all the code is in the named section.
10402 // FIXME: Allow outlining from multiple functions with the same section
10403 // marking.
10404 if (F.hasSection())
10405 return false;
10406
10407 // Outlining from functions with redzones is unsafe since the outliner may
10408 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
10409 // outline from it.
10410 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
10411 if (!AFI || AFI->hasRedZone().value_or(true))
10412 return false;
10413
10414 // FIXME: Determine whether it is safe to outline from functions which contain
10415 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
10416 // outlined together and ensure it is safe to outline with async unwind info,
10417 // required for saving & restoring VG around calls.
10418 if (AFI->hasStreamingModeChanges())
10419 return false;
10420
10421 // FIXME: Teach the outliner to generate/handle Windows unwind info.
10423 return false;
10424
10425 // It's safe to outline from MF.
10426 return true;
10427}
10428
10430AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10431 unsigned &Flags) const {
10433 "Must track liveness!");
10435 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10436 Ranges;
10437 // According to the AArch64 Procedure Call Standard, the following are
10438 // undefined on entry/exit from a function call:
10439 //
10440 // * Registers x16, x17, (and thus w16, w17)
10441 // * Condition codes (and thus the NZCV register)
10442 //
10443 // If any of these registers are used inside or live across an outlined
10444 // function, then they may be modified later, either by the compiler or
10445 // some other tool (like the linker).
10446 //
10447 // To avoid outlining in these situations, partition each block into ranges
10448 // where these registers are dead. We will only outline from those ranges.
10449 LiveRegUnits LRU(getRegisterInfo());
10450 auto AreAllUnsafeRegsDead = [&LRU]() {
10451 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10452 LRU.available(AArch64::NZCV);
10453 };
10454
10455 // We need to know if LR is live across an outlining boundary later on in
10456 // order to decide how we'll create the outlined call, frame, etc.
10457 //
10458 // It's pretty expensive to check this for *every candidate* within a block.
10459 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10460 // to compute liveness from the end of the block for O(n) candidates within
10461 // the block.
10462 //
10463 // So, to improve the average case, let's keep track of liveness from the end
10464 // of the block to the beginning of *every outlinable range*. If we know that
10465 // LR is available in every range we could outline from, then we know that
10466 // we don't need to check liveness for any candidate within that range.
10467 bool LRAvailableEverywhere = true;
10468 // Compute liveness bottom-up.
10469 LRU.addLiveOuts(MBB);
10470 // Update flags that require info about the entire MBB.
10471 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10472 if (MI.isCall() && !MI.isTerminator())
10474 };
10475 // Range: [RangeBegin, RangeEnd)
10476 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10477 unsigned RangeLen;
10478 auto CreateNewRangeStartingAt =
10479 [&RangeBegin, &RangeEnd,
10480 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10481 RangeBegin = NewBegin;
10482 RangeEnd = std::next(RangeBegin);
10483 RangeLen = 0;
10484 };
10485 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10486 // At least one unsafe register is not dead. We do not want to outline at
10487 // this point. If it is long enough to outline from and does not cross a
10488 // bundle boundary, save the range [RangeBegin, RangeEnd).
10489 if (RangeLen <= 1)
10490 return;
10491 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10492 return;
10493 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10494 return;
10495 Ranges.emplace_back(RangeBegin, RangeEnd);
10496 };
10497 // Find the first point where all unsafe registers are dead.
10498 // FIND: <safe instr> <-- end of first potential range
10499 // SKIP: <unsafe def>
10500 // SKIP: ... everything between ...
10501 // SKIP: <unsafe use>
10502 auto FirstPossibleEndPt = MBB.instr_rbegin();
10503 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10504 LRU.stepBackward(*FirstPossibleEndPt);
10505 // Update flags that impact how we outline across the entire block,
10506 // regardless of safety.
10507 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10508 if (AreAllUnsafeRegsDead())
10509 break;
10510 }
10511 // If we exhausted the entire block, we have no safe ranges to outline.
10512 if (FirstPossibleEndPt == MBB.instr_rend())
10513 return Ranges;
10514 // Current range.
10515 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10516 // StartPt points to the first place where all unsafe registers
10517 // are dead (if there is any such point). Begin partitioning the MBB into
10518 // ranges.
10519 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10520 LRU.stepBackward(MI);
10521 UpdateWholeMBBFlags(MI);
10522 if (!AreAllUnsafeRegsDead()) {
10523 SaveRangeIfNonEmpty();
10524 CreateNewRangeStartingAt(MI.getIterator());
10525 continue;
10526 }
10527 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10528 RangeBegin = MI.getIterator();
10529 ++RangeLen;
10530 }
10531 // Above loop misses the last (or only) range. If we are still safe, then
10532 // let's save the range.
10533 if (AreAllUnsafeRegsDead())
10534 SaveRangeIfNonEmpty();
10535 if (Ranges.empty())
10536 return Ranges;
10537 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10538 // the order.
10539 std::reverse(Ranges.begin(), Ranges.end());
10540 // If there is at least one outlinable range where LR is unavailable
10541 // somewhere, remember that.
10542 if (!LRAvailableEverywhere)
10544 return Ranges;
10545}
10546
10548AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10550 unsigned Flags) const {
10551 MachineInstr &MI = *MIT;
10552
10553 // Don't outline anything used for return address signing. The outlined
10554 // function will get signed later if needed
10555 switch (MI.getOpcode()) {
10556 case AArch64::PACM:
10557 case AArch64::PACIASP:
10558 case AArch64::PACIBSP:
10559 case AArch64::PACIASPPC:
10560 case AArch64::PACIBSPPC:
10561 case AArch64::AUTIASP:
10562 case AArch64::AUTIBSP:
10563 case AArch64::AUTIASPPCi:
10564 case AArch64::AUTIASPPCr:
10565 case AArch64::AUTIBSPPCi:
10566 case AArch64::AUTIBSPPCr:
10567 case AArch64::RETAA:
10568 case AArch64::RETAB:
10569 case AArch64::RETAASPPCi:
10570 case AArch64::RETAASPPCr:
10571 case AArch64::RETABSPPCi:
10572 case AArch64::RETABSPPCr:
10573 case AArch64::EMITBKEY:
10574 case AArch64::PAUTH_PROLOGUE:
10575 case AArch64::PAUTH_EPILOGUE:
10577 }
10578
10579 // We can only outline these if we will tail call the outlined function, or
10580 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10581 // in a tail call.
10582 //
10583 // FIXME: If the proper fixups for the offset are implemented, this should be
10584 // possible.
10585 if (MI.isCFIInstruction())
10587
10588 // Is this a terminator for a basic block?
10589 if (MI.isTerminator())
10590 // TargetInstrInfo::getOutliningType has already filtered out anything
10591 // that would break this, so we can allow it here.
10593
10594 // Make sure none of the operands are un-outlinable.
10595 for (const MachineOperand &MOP : MI.operands()) {
10596 // A check preventing CFI indices was here before, but only CFI
10597 // instructions should have those.
10598 assert(!MOP.isCFIIndex());
10599
10600 // If it uses LR or W30 explicitly, then don't touch it.
10601 if (MOP.isReg() && !MOP.isImplicit() &&
10602 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10604 }
10605
10606 // Special cases for instructions that can always be outlined, but will fail
10607 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10608 // be outlined because they don't require a *specific* value to be in LR.
10609 if (MI.getOpcode() == AArch64::ADRP)
10611
10612 // If MI is a call we might be able to outline it. We don't want to outline
10613 // any calls that rely on the position of items on the stack. When we outline
10614 // something containing a call, we have to emit a save and restore of LR in
10615 // the outlined function. Currently, this always happens by saving LR to the
10616 // stack. Thus, if we outline, say, half the parameters for a function call
10617 // plus the call, then we'll break the callee's expectations for the layout
10618 // of the stack.
10619 //
10620 // FIXME: Allow calls to functions which construct a stack frame, as long
10621 // as they don't access arguments on the stack.
10622 // FIXME: Figure out some way to analyze functions defined in other modules.
10623 // We should be able to compute the memory usage based on the IR calling
10624 // convention, even if we can't see the definition.
10625 if (MI.isCall()) {
10626 // Get the function associated with the call. Look at each operand and find
10627 // the one that represents the callee and get its name.
10628 const Function *Callee = nullptr;
10629 for (const MachineOperand &MOP : MI.operands()) {
10630 if (MOP.isGlobal()) {
10631 Callee = dyn_cast<Function>(MOP.getGlobal());
10632 break;
10633 }
10634 }
10635
10636 // Never outline calls to mcount. There isn't any rule that would require
10637 // this, but the Linux kernel's "ftrace" feature depends on it.
10638 if (Callee && Callee->getName() == "\01_mcount")
10640
10641 // If we don't know anything about the callee, assume it depends on the
10642 // stack layout of the caller. In that case, it's only legal to outline
10643 // as a tail-call. Explicitly list the call instructions we know about so we
10644 // don't get unexpected results with call pseudo-instructions.
10645 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10646 if (MI.getOpcode() == AArch64::BLR ||
10647 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10648 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10649
10650 if (!Callee)
10651 return UnknownCallOutlineType;
10652
10653 // We have a function we have information about. Check it if it's something
10654 // can safely outline.
10655 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10656
10657 // We don't know what's going on with the callee at all. Don't touch it.
10658 if (!CalleeMF)
10659 return UnknownCallOutlineType;
10660
10661 // Check if we know anything about the callee saves on the function. If we
10662 // don't, then don't touch it, since that implies that we haven't
10663 // computed anything about its stack frame yet.
10664 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10665 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10666 MFI.getNumObjects() > 0)
10667 return UnknownCallOutlineType;
10668
10669 // At this point, we can say that CalleeMF ought to not pass anything on the
10670 // stack. Therefore, we can outline it.
10672 }
10673
10674 // Don't touch the link register or W30.
10675 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10676 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10678
10679 // Don't outline BTI instructions, because that will prevent the outlining
10680 // site from being indirectly callable.
10681 if (hasBTISemantics(MI))
10683
10685}
10686
10687void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10688 for (MachineInstr &MI : MBB) {
10689 const MachineOperand *Base;
10690 TypeSize Width(0, false);
10691 int64_t Offset;
10692 bool OffsetIsScalable;
10693
10694 // Is this a load or store with an immediate offset with SP as the base?
10695 if (!MI.mayLoadOrStore() ||
10696 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10697 &RI) ||
10698 (Base->isReg() && Base->getReg() != AArch64::SP))
10699 continue;
10700
10701 // It is, so we have to fix it up.
10702 TypeSize Scale(0U, false);
10703 int64_t Dummy1, Dummy2;
10704
10705 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10706 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10707 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10708 assert(Scale != 0 && "Unexpected opcode!");
10709 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10710
10711 // We've pushed the return address to the stack, so add 16 to the offset.
10712 // This is safe, since we already checked if it would overflow when we
10713 // checked if this instruction was legal to outline.
10714 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10715 StackOffsetOperand.setImm(NewImm);
10716 }
10717}
10718
10720 const AArch64InstrInfo *TII,
10721 bool ShouldSignReturnAddr) {
10722 if (!ShouldSignReturnAddr)
10723 return;
10724
10725 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10727 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10728 TII->get(AArch64::PAUTH_EPILOGUE))
10730}
10731
10732void AArch64InstrInfo::buildOutlinedFrame(
10734 const outliner::OutlinedFunction &OF) const {
10735
10736 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10737
10738 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10739 FI->setOutliningStyle("Tail Call");
10740 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10741 // For thunk outlining, rewrite the last instruction from a call to a
10742 // tail-call.
10743 MachineInstr *Call = &*--MBB.instr_end();
10744 unsigned TailOpcode;
10745 if (Call->getOpcode() == AArch64::BL) {
10746 TailOpcode = AArch64::TCRETURNdi;
10747 } else {
10748 assert(Call->getOpcode() == AArch64::BLR ||
10749 Call->getOpcode() == AArch64::BLRNoIP);
10750 TailOpcode = AArch64::TCRETURNriALL;
10751 }
10752 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10753 .add(Call->getOperand(0))
10754 .addImm(0);
10755 MBB.insert(MBB.end(), TC);
10757
10758 FI->setOutliningStyle("Thunk");
10759 }
10760
10761 bool IsLeafFunction = true;
10762
10763 // Is there a call in the outlined range?
10764 auto IsNonTailCall = [](const MachineInstr &MI) {
10765 return MI.isCall() && !MI.isReturn();
10766 };
10767
10768 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10769 // Fix up the instructions in the range, since we're going to modify the
10770 // stack.
10771
10772 // Bugzilla ID: 46767
10773 // TODO: Check if fixing up twice is safe so we can outline these.
10774 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10775 "Can only fix up stack references once");
10776 fixupPostOutline(MBB);
10777
10778 IsLeafFunction = false;
10779
10780 // LR has to be a live in so that we can save it.
10781 if (!MBB.isLiveIn(AArch64::LR))
10782 MBB.addLiveIn(AArch64::LR);
10783
10786
10787 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10788 OF.FrameConstructionID == MachineOutlinerThunk)
10789 Et = std::prev(MBB.end());
10790
10791 // Insert a save before the outlined region
10792 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10793 .addReg(AArch64::SP, RegState::Define)
10794 .addReg(AArch64::LR)
10795 .addReg(AArch64::SP)
10796 .addImm(-16);
10797 It = MBB.insert(It, STRXpre);
10798
10799 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10800 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10801
10802 // Add a CFI saying the stack was moved 16 B down.
10803 CFIBuilder.buildDefCFAOffset(16);
10804
10805 // Add a CFI saying that the LR that we want to find is now 16 B higher
10806 // than before.
10807 CFIBuilder.buildOffset(AArch64::LR, -16);
10808 }
10809
10810 // Insert a restore before the terminator for the function.
10811 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10812 .addReg(AArch64::SP, RegState::Define)
10813 .addReg(AArch64::LR, RegState::Define)
10814 .addReg(AArch64::SP)
10815 .addImm(16);
10816 Et = MBB.insert(Et, LDRXpost);
10817 }
10818
10819 auto RASignCondition = FI->getSignReturnAddressCondition();
10820 bool ShouldSignReturnAddr = AArch64FunctionInfo::shouldSignReturnAddress(
10821 RASignCondition, !IsLeafFunction);
10822
10823 // If this is a tail call outlined function, then there's already a return.
10824 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10825 OF.FrameConstructionID == MachineOutlinerThunk) {
10826 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10827 return;
10828 }
10829
10830 // It's not a tail call, so we have to insert the return ourselves.
10831
10832 // LR has to be a live in so that we can return to it.
10833 if (!MBB.isLiveIn(AArch64::LR))
10834 MBB.addLiveIn(AArch64::LR);
10835
10836 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10837 .addReg(AArch64::LR);
10838 MBB.insert(MBB.end(), ret);
10839
10840 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10841
10842 FI->setOutliningStyle("Function");
10843
10844 // Did we have to modify the stack by saving the link register?
10845 if (OF.FrameConstructionID != MachineOutlinerDefault)
10846 return;
10847
10848 // We modified the stack.
10849 // Walk over the basic block and fix up all the stack accesses.
10850 fixupPostOutline(MBB);
10851}
10852
10853MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10856
10857 // Are we tail calling?
10858 if (C.CallConstructionID == MachineOutlinerTailCall) {
10859 // If yes, then we can just branch to the label.
10860 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10861 .addGlobalAddress(M.getNamedValue(MF.getName()))
10862 .addImm(0));
10863 return It;
10864 }
10865
10866 // Are we saving the link register?
10867 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10868 C.CallConstructionID == MachineOutlinerThunk) {
10869 // No, so just insert the call.
10870 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10871 .addGlobalAddress(M.getNamedValue(MF.getName())));
10872 return It;
10873 }
10874
10875 // We want to return the spot where we inserted the call.
10877
10878 // Instructions for saving and restoring LR around the call instruction we're
10879 // going to insert.
10880 MachineInstr *Save;
10881 MachineInstr *Restore;
10882 // Can we save to a register?
10883 if (C.CallConstructionID == MachineOutlinerRegSave) {
10884 // FIXME: This logic should be sunk into a target-specific interface so that
10885 // we don't have to recompute the register.
10886 Register Reg = findRegisterToSaveLRTo(C);
10887 assert(Reg && "No callee-saved register available?");
10888
10889 // LR has to be a live in so that we can save it.
10890 if (!MBB.isLiveIn(AArch64::LR))
10891 MBB.addLiveIn(AArch64::LR);
10892
10893 // Save and restore LR from Reg.
10894 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10895 .addReg(AArch64::XZR)
10896 .addReg(AArch64::LR)
10897 .addImm(0);
10898 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10899 .addReg(AArch64::XZR)
10900 .addReg(Reg)
10901 .addImm(0);
10902 } else {
10903 // We have the default case. Save and restore from SP.
10904 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10905 .addReg(AArch64::SP, RegState::Define)
10906 .addReg(AArch64::LR)
10907 .addReg(AArch64::SP)
10908 .addImm(-16);
10909 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10910 .addReg(AArch64::SP, RegState::Define)
10911 .addReg(AArch64::LR, RegState::Define)
10912 .addReg(AArch64::SP)
10913 .addImm(16);
10914 }
10915
10916 It = MBB.insert(It, Save);
10917 It++;
10918
10919 // Insert the call.
10920 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10921 .addGlobalAddress(M.getNamedValue(MF.getName())));
10922 CallPt = It;
10923 It++;
10924
10925 It = MBB.insert(It, Restore);
10926 return CallPt;
10927}
10928
10929bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10930 MachineFunction &MF) const {
10931 return MF.getFunction().hasMinSize();
10932}
10933
10934void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10936 DebugLoc &DL,
10937 bool AllowSideEffects) const {
10938 const MachineFunction &MF = *MBB.getParent();
10939 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10940 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10941
10942 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10943 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10944 } else if (STI.isSVEorStreamingSVEAvailable()) {
10945 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10946 .addImm(0)
10947 .addImm(0);
10948 } else if (STI.isNeonAvailable()) {
10949 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10950 .addImm(0);
10951 } else {
10952 // This is a streaming-compatible function without SVE. We don't have full
10953 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10954 // So given `movi v..` would be illegal use `fmov d..` instead.
10955 assert(STI.hasNEON() && "Expected to have NEON.");
10956 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10957 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10958 }
10959}
10960
10961std::optional<DestSourcePair>
10963
10964 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10965 // and zero immediate operands used as an alias for mov instruction.
10966 if (((MI.getOpcode() == AArch64::ORRWrs &&
10967 MI.getOperand(1).getReg() == AArch64::WZR &&
10968 MI.getOperand(3).getImm() == 0x0) ||
10969 (MI.getOpcode() == AArch64::ORRWrr &&
10970 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10971 // Check that the w->w move is not a zero-extending w->x mov.
10972 (!MI.getOperand(0).getReg().isVirtual() ||
10973 MI.getOperand(0).getSubReg() == 0) &&
10974 (!MI.getOperand(0).getReg().isPhysical() ||
10975 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10976 /*TRI=*/nullptr) == -1))
10977 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10978
10979 if (MI.getOpcode() == AArch64::ORRXrs &&
10980 MI.getOperand(1).getReg() == AArch64::XZR &&
10981 MI.getOperand(3).getImm() == 0x0)
10982 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10983
10984 return std::nullopt;
10985}
10986
10987std::optional<DestSourcePair>
10989 if ((MI.getOpcode() == AArch64::ORRWrs &&
10990 MI.getOperand(1).getReg() == AArch64::WZR &&
10991 MI.getOperand(3).getImm() == 0x0) ||
10992 (MI.getOpcode() == AArch64::ORRWrr &&
10993 MI.getOperand(1).getReg() == AArch64::WZR))
10994 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10995 return std::nullopt;
10996}
10997
10998std::optional<RegImmPair>
10999AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
11000 int Sign = 1;
11001 int64_t Offset = 0;
11002
11003 // TODO: Handle cases where Reg is a super- or sub-register of the
11004 // destination register.
11005 const MachineOperand &Op0 = MI.getOperand(0);
11006 if (!Op0.isReg() || Reg != Op0.getReg())
11007 return std::nullopt;
11008
11009 switch (MI.getOpcode()) {
11010 default:
11011 return std::nullopt;
11012 case AArch64::SUBWri:
11013 case AArch64::SUBXri:
11014 case AArch64::SUBSWri:
11015 case AArch64::SUBSXri:
11016 Sign *= -1;
11017 [[fallthrough]];
11018 case AArch64::ADDSWri:
11019 case AArch64::ADDSXri:
11020 case AArch64::ADDWri:
11021 case AArch64::ADDXri: {
11022 // TODO: Third operand can be global address (usually some string).
11023 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
11024 !MI.getOperand(2).isImm())
11025 return std::nullopt;
11026 int Shift = MI.getOperand(3).getImm();
11027 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
11028 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
11029 }
11030 }
11031 return RegImmPair{MI.getOperand(1).getReg(), Offset};
11032}
11033
11034/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
11035/// the destination register then, if possible, describe the value in terms of
11036/// the source register.
11037static std::optional<ParamLoadedValue>
11039 const TargetInstrInfo *TII,
11040 const TargetRegisterInfo *TRI) {
11041 auto DestSrc = TII->isCopyLikeInstr(MI);
11042 if (!DestSrc)
11043 return std::nullopt;
11044
11045 Register DestReg = DestSrc->Destination->getReg();
11046 Register SrcReg = DestSrc->Source->getReg();
11047
11048 if (!DestReg.isValid() || !SrcReg.isValid())
11049 return std::nullopt;
11050
11051 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
11052
11053 // If the described register is the destination, just return the source.
11054 if (DestReg == DescribedReg)
11055 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
11056
11057 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
11058 if (MI.getOpcode() == AArch64::ORRWrs &&
11059 TRI->isSuperRegister(DestReg, DescribedReg))
11060 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
11061
11062 // We may need to describe the lower part of a ORRXrs move.
11063 if (MI.getOpcode() == AArch64::ORRXrs &&
11064 TRI->isSubRegister(DestReg, DescribedReg)) {
11065 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
11066 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
11067 }
11068
11069 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
11070 "Unhandled ORR[XW]rs copy case");
11071
11072 return std::nullopt;
11073}
11074
11075bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
11076 // Functions cannot be split to different sections on AArch64 if they have
11077 // a red zone. This is because relaxing a cross-section branch may require
11078 // incrementing the stack pointer to spill a register, which would overwrite
11079 // the red zone.
11080 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
11081 return false;
11082
11084}
11085
11086bool AArch64InstrInfo::isMBBSafeToSplitToCold(
11087 const MachineBasicBlock &MBB) const {
11088 // Asm Goto blocks can contain conditional branches to goto labels, which can
11089 // get moved out of range of the branch instruction.
11090 auto isAsmGoto = [](const MachineInstr &MI) {
11091 return MI.getOpcode() == AArch64::INLINEASM_BR;
11092 };
11093 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
11094 return false;
11095
11096 // Because jump tables are label-relative instead of table-relative, they all
11097 // must be in the same section or relocation fixup handling will fail.
11098
11099 // Check if MBB is a jump table target
11100 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
11101 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
11102 return llvm::is_contained(JTE.MBBs, &MBB);
11103 };
11104 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
11105 return false;
11106
11107 // Check if MBB contains a jump table lookup
11108 for (const MachineInstr &MI : MBB) {
11109 switch (MI.getOpcode()) {
11110 case TargetOpcode::G_BRJT:
11111 case AArch64::JumpTableDest32:
11112 case AArch64::JumpTableDest16:
11113 case AArch64::JumpTableDest8:
11114 return false;
11115 default:
11116 continue;
11117 }
11118 }
11119
11120 // MBB isn't a special case, so it's safe to be split to the cold section.
11121 return true;
11122}
11123
11124std::optional<ParamLoadedValue>
11125AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
11126 Register Reg) const {
11127 const MachineFunction *MF = MI.getMF();
11128 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
11129 switch (MI.getOpcode()) {
11130 case AArch64::MOVZWi:
11131 case AArch64::MOVZXi: {
11132 // MOVZWi may be used for producing zero-extended 32-bit immediates in
11133 // 64-bit parameters, so we need to consider super-registers.
11134 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
11135 return std::nullopt;
11136
11137 if (!MI.getOperand(1).isImm())
11138 return std::nullopt;
11139 int64_t Immediate = MI.getOperand(1).getImm();
11140 int Shift = MI.getOperand(2).getImm();
11141 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
11142 nullptr);
11143 }
11144 case AArch64::ORRWrs:
11145 case AArch64::ORRXrs:
11146 return describeORRLoadedValue(MI, Reg, this, TRI);
11147 }
11148
11150}
11151
11152bool AArch64InstrInfo::isExtendLikelyToBeFolded(
11153 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
11154 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
11155 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
11156 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
11157
11158 // Anyexts are nops.
11159 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
11160 return true;
11161
11162 Register DefReg = ExtMI.getOperand(0).getReg();
11163 if (!MRI.hasOneNonDBGUse(DefReg))
11164 return false;
11165
11166 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
11167 // addressing mode.
11168 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
11169 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
11170}
11171
11172uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
11173 return get(Opc).TSFlags & AArch64::ElementSizeMask;
11174}
11175
11176bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
11177 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
11178}
11179
11180bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
11181 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
11182}
11183
11184unsigned int
11185AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
11186 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
11187}
11188
11189bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
11190 unsigned Scale) const {
11191 if (Offset && Scale)
11192 return false;
11193
11194 // Check Reg + Imm
11195 if (!Scale) {
11196 // 9-bit signed offset
11197 if (isInt<9>(Offset))
11198 return true;
11199
11200 // 12-bit unsigned offset
11201 unsigned Shift = Log2_64(NumBytes);
11202 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
11203 // Must be a multiple of NumBytes (NumBytes is a power of 2)
11204 (Offset >> Shift) << Shift == Offset)
11205 return true;
11206 return false;
11207 }
11208
11209 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
11210 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
11211}
11212
11214 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
11215 return AArch64::BLRNoIP;
11216 else
11217 return AArch64::BLR;
11218}
11219
11222 Register TargetReg, bool FrameSetup) const {
11223 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
11224
11225 MachineBasicBlock &MBB = *MBBI->getParent();
11226 MachineFunction &MF = *MBB.getParent();
11227 const AArch64InstrInfo *TII =
11228 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
11229 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
11230 DebugLoc DL = MBB.findDebugLoc(MBBI);
11231
11232 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
11233 MachineBasicBlock *LoopTestMBB =
11234 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11235 MF.insert(MBBInsertPoint, LoopTestMBB);
11236 MachineBasicBlock *LoopBodyMBB =
11237 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11238 MF.insert(MBBInsertPoint, LoopBodyMBB);
11239 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11240 MF.insert(MBBInsertPoint, ExitMBB);
11241 MachineInstr::MIFlag Flags =
11243
11244 // LoopTest:
11245 // SUB SP, SP, #ProbeSize
11246 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
11247 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
11248
11249 // CMP SP, TargetReg
11250 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
11251 AArch64::XZR)
11252 .addReg(AArch64::SP)
11253 .addReg(TargetReg)
11255 .setMIFlags(Flags);
11256
11257 // B.<Cond> LoopExit
11258 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
11260 .addMBB(ExitMBB)
11261 .setMIFlags(Flags);
11262
11263 // STR XZR, [SP]
11264 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
11265 .addReg(AArch64::XZR)
11266 .addReg(AArch64::SP)
11267 .addImm(0)
11268 .setMIFlags(Flags);
11269
11270 // B loop
11271 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
11272 .addMBB(LoopTestMBB)
11273 .setMIFlags(Flags);
11274
11275 // LoopExit:
11276 // MOV SP, TargetReg
11277 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
11278 .addReg(TargetReg)
11279 .addImm(0)
11281 .setMIFlags(Flags);
11282
11283 // LDR XZR, [SP]
11284 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
11285 .addReg(AArch64::XZR, RegState::Define)
11286 .addReg(AArch64::SP)
11287 .addImm(0)
11288 .setMIFlags(Flags);
11289
11290 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
11292
11293 LoopTestMBB->addSuccessor(ExitMBB);
11294 LoopTestMBB->addSuccessor(LoopBodyMBB);
11295 LoopBodyMBB->addSuccessor(LoopTestMBB);
11296 MBB.addSuccessor(LoopTestMBB);
11297
11298 // Update liveins.
11299 if (MF.getRegInfo().reservedRegsFrozen())
11300 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
11301
11302 return ExitMBB->begin();
11303}
11304
11305namespace {
11306class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
11307 MachineFunction *MF;
11308 const TargetInstrInfo *TII;
11309 const TargetRegisterInfo *TRI;
11311
11312 /// The block of the loop
11313 MachineBasicBlock *LoopBB;
11314 /// The conditional branch of the loop
11315 MachineInstr *CondBranch;
11316 /// The compare instruction for loop control
11317 MachineInstr *Comp;
11318 /// The number of the operand of the loop counter value in Comp
11319 unsigned CompCounterOprNum;
11320 /// The instruction that updates the loop counter value
11321 MachineInstr *Update;
11322 /// The number of the operand of the loop counter value in Update
11323 unsigned UpdateCounterOprNum;
11324 /// The initial value of the loop counter
11325 Register Init;
11326 /// True iff Update is a predecessor of Comp
11327 bool IsUpdatePriorComp;
11328
11329 /// The normalized condition used by createTripCountGreaterCondition()
11331
11332public:
11333 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
11334 MachineInstr *Comp, unsigned CompCounterOprNum,
11335 MachineInstr *Update, unsigned UpdateCounterOprNum,
11336 Register Init, bool IsUpdatePriorComp,
11338 : MF(Comp->getParent()->getParent()),
11339 TII(MF->getSubtarget().getInstrInfo()),
11340 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
11341 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
11342 CompCounterOprNum(CompCounterOprNum), Update(Update),
11343 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
11344 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
11345
11346 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
11347 // Make the instructions for loop control be placed in stage 0.
11348 // The predecessors of Comp are considered by the caller.
11349 return MI == Comp;
11350 }
11351
11352 std::optional<bool> createTripCountGreaterCondition(
11353 int TC, MachineBasicBlock &MBB,
11354 SmallVectorImpl<MachineOperand> &CondParam) override {
11355 // A branch instruction will be inserted as "if (Cond) goto epilogue".
11356 // Cond is normalized for such use.
11357 // The predecessors of the branch are assumed to have already been inserted.
11358 CondParam = Cond;
11359 return {};
11360 }
11361
11362 void createRemainingIterationsGreaterCondition(
11363 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
11364 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
11365
11366 void setPreheader(MachineBasicBlock *NewPreheader) override {}
11367
11368 void adjustTripCount(int TripCountAdjust) override {}
11369
11370 bool isMVEExpanderSupported() override { return true; }
11371};
11372} // namespace
11373
11374/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
11375/// is replaced by ReplaceReg. The output register is newly created.
11376/// The other operands are unchanged from MI.
11377static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
11378 Register ReplaceReg, MachineBasicBlock &MBB,
11379 MachineBasicBlock::iterator InsertTo) {
11380 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
11381 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
11382 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
11383 Register Result = 0;
11384 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
11385 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
11386 Result = MRI.createVirtualRegister(
11387 MRI.getRegClass(NewMI->getOperand(0).getReg()));
11388 NewMI->getOperand(I).setReg(Result);
11389 } else if (I == ReplaceOprNum) {
11390 MRI.constrainRegClass(ReplaceReg, TII->getRegClass(NewMI->getDesc(), I));
11391 NewMI->getOperand(I).setReg(ReplaceReg);
11392 }
11393 }
11394 MBB.insert(InsertTo, NewMI);
11395 return Result;
11396}
11397
11398void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
11401 // Create and accumulate conditions for next TC iterations.
11402 // Example:
11403 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
11404 // # iteration of the kernel
11405 //
11406 // # insert the following instructions
11407 // cond = CSINCXr 0, 0, C, implicit $nzcv
11408 // counter = ADDXri counter, 1 # clone from this->Update
11409 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
11410 // cond = CSINCXr cond, cond, C, implicit $nzcv
11411 // ... (repeat TC times)
11412 // SUBSXri cond, 0, implicit-def $nzcv
11413
11414 assert(CondBranch->getOpcode() == AArch64::Bcc);
11415 // CondCode to exit the loop
11417 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
11418 if (CondBranch->getOperand(1).getMBB() == LoopBB)
11420
11421 // Accumulate conditions to exit the loop
11422 Register AccCond = AArch64::XZR;
11423
11424 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
11425 auto AccumulateCond = [&](Register CurCond,
11427 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11428 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11429 .addReg(NewCond, RegState::Define)
11430 .addReg(CurCond)
11431 .addReg(CurCond)
11433 return NewCond;
11434 };
11435
11436 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11437 // Update and Comp for I==0 are already exists in MBB
11438 // (MBB is an unrolled kernel)
11439 Register Counter;
11440 for (int I = 0; I <= TC; ++I) {
11441 Register NextCounter;
11442 if (I != 0)
11443 NextCounter =
11444 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11445
11446 AccCond = AccumulateCond(AccCond, CC);
11447
11448 if (I != TC) {
11449 if (I == 0) {
11450 if (Update != Comp && IsUpdatePriorComp) {
11451 Counter =
11452 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11453 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11454 MBB.end());
11455 } else {
11456 // can use already calculated value
11457 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11458 }
11459 } else if (Update != Comp) {
11460 NextCounter =
11461 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11462 }
11463 }
11464 Counter = NextCounter;
11465 }
11466 } else {
11467 Register Counter;
11468 if (LastStage0Insts.empty()) {
11469 // use initial counter value (testing if the trip count is sufficient to
11470 // be executed by pipelined code)
11471 Counter = Init;
11472 if (IsUpdatePriorComp)
11473 Counter =
11474 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11475 } else {
11476 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11477 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11478 }
11479
11480 for (int I = 0; I <= TC; ++I) {
11481 Register NextCounter;
11482 NextCounter =
11483 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11484 AccCond = AccumulateCond(AccCond, CC);
11485 if (I != TC && Update != Comp)
11486 NextCounter =
11487 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11488 Counter = NextCounter;
11489 }
11490 }
11491
11492 // If AccCond == 0, the remainder is greater than TC.
11493 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11494 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11495 .addReg(AccCond)
11496 .addImm(0)
11497 .addImm(0);
11498 Cond.clear();
11500}
11501
11502static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11503 Register &RegMBB, Register &RegOther) {
11504 assert(Phi.getNumOperands() == 5);
11505 if (Phi.getOperand(2).getMBB() == MBB) {
11506 RegMBB = Phi.getOperand(1).getReg();
11507 RegOther = Phi.getOperand(3).getReg();
11508 } else {
11509 assert(Phi.getOperand(4).getMBB() == MBB);
11510 RegMBB = Phi.getOperand(3).getReg();
11511 RegOther = Phi.getOperand(1).getReg();
11512 }
11513}
11514
11516 if (!Reg.isVirtual())
11517 return false;
11518 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11519 return MRI.getVRegDef(Reg)->getParent() != BB;
11520}
11521
11522/// If Reg is an induction variable, return true and set some parameters
11523static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11524 MachineInstr *&UpdateInst,
11525 unsigned &UpdateCounterOprNum, Register &InitReg,
11526 bool &IsUpdatePriorComp) {
11527 // Example:
11528 //
11529 // Preheader:
11530 // InitReg = ...
11531 // LoopBB:
11532 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11533 // Reg = COPY Reg0 ; COPY is ignored.
11534 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11535 // ; Reg is the value calculated in the previous
11536 // ; iteration, so IsUpdatePriorComp == false.
11537
11538 if (LoopBB->pred_size() != 2)
11539 return false;
11540 if (!Reg.isVirtual())
11541 return false;
11542 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11543 UpdateInst = nullptr;
11544 UpdateCounterOprNum = 0;
11545 InitReg = 0;
11546 IsUpdatePriorComp = true;
11547 Register CurReg = Reg;
11548 while (true) {
11549 MachineInstr *Def = MRI.getVRegDef(CurReg);
11550 if (Def->getParent() != LoopBB)
11551 return false;
11552 if (Def->isCopy()) {
11553 // Ignore copy instructions unless they contain subregisters
11554 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11555 return false;
11556 CurReg = Def->getOperand(1).getReg();
11557 } else if (Def->isPHI()) {
11558 if (InitReg != 0)
11559 return false;
11560 if (!UpdateInst)
11561 IsUpdatePriorComp = false;
11562 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11563 } else {
11564 if (UpdateInst)
11565 return false;
11566 switch (Def->getOpcode()) {
11567 case AArch64::ADDSXri:
11568 case AArch64::ADDSWri:
11569 case AArch64::SUBSXri:
11570 case AArch64::SUBSWri:
11571 case AArch64::ADDXri:
11572 case AArch64::ADDWri:
11573 case AArch64::SUBXri:
11574 case AArch64::SUBWri:
11575 UpdateInst = Def;
11576 UpdateCounterOprNum = 1;
11577 break;
11578 case AArch64::ADDSXrr:
11579 case AArch64::ADDSWrr:
11580 case AArch64::SUBSXrr:
11581 case AArch64::SUBSWrr:
11582 case AArch64::ADDXrr:
11583 case AArch64::ADDWrr:
11584 case AArch64::SUBXrr:
11585 case AArch64::SUBWrr:
11586 UpdateInst = Def;
11587 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11588 UpdateCounterOprNum = 1;
11589 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11590 UpdateCounterOprNum = 2;
11591 else
11592 return false;
11593 break;
11594 default:
11595 return false;
11596 }
11597 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11598 }
11599
11600 if (!CurReg.isVirtual())
11601 return false;
11602 if (Reg == CurReg)
11603 break;
11604 }
11605
11606 if (!UpdateInst)
11607 return false;
11608
11609 return true;
11610}
11611
11612std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11614 // Accept loops that meet the following conditions
11615 // * The conditional branch is BCC
11616 // * The compare instruction is ADDS/SUBS/WHILEXX
11617 // * One operand of the compare is an induction variable and the other is a
11618 // loop invariant value
11619 // * The induction variable is incremented/decremented by a single instruction
11620 // * Does not contain CALL or instructions which have unmodeled side effects
11621
11622 for (MachineInstr &MI : *LoopBB)
11623 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11624 // This instruction may use NZCV, which interferes with the instruction to
11625 // be inserted for loop control.
11626 return nullptr;
11627
11628 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11630 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11631 return nullptr;
11632
11633 // Infinite loops are not supported
11634 if (TBB == LoopBB && FBB == LoopBB)
11635 return nullptr;
11636
11637 // Must be conditional branch
11638 if (TBB != LoopBB && FBB == nullptr)
11639 return nullptr;
11640
11641 assert((TBB == LoopBB || FBB == LoopBB) &&
11642 "The Loop must be a single-basic-block loop");
11643
11644 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11646
11647 if (CondBranch->getOpcode() != AArch64::Bcc)
11648 return nullptr;
11649
11650 // Normalization for createTripCountGreaterCondition()
11651 if (TBB == LoopBB)
11653
11654 MachineInstr *Comp = nullptr;
11655 unsigned CompCounterOprNum = 0;
11656 for (MachineInstr &MI : reverse(*LoopBB)) {
11657 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11658 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11659 // operands is a loop invariant value
11660
11661 switch (MI.getOpcode()) {
11662 case AArch64::SUBSXri:
11663 case AArch64::SUBSWri:
11664 case AArch64::ADDSXri:
11665 case AArch64::ADDSWri:
11666 Comp = &MI;
11667 CompCounterOprNum = 1;
11668 break;
11669 case AArch64::ADDSWrr:
11670 case AArch64::ADDSXrr:
11671 case AArch64::SUBSWrr:
11672 case AArch64::SUBSXrr:
11673 Comp = &MI;
11674 break;
11675 default:
11676 if (isWhileOpcode(MI.getOpcode())) {
11677 Comp = &MI;
11678 break;
11679 }
11680 return nullptr;
11681 }
11682
11683 if (CompCounterOprNum == 0) {
11684 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11685 CompCounterOprNum = 2;
11686 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11687 CompCounterOprNum = 1;
11688 else
11689 return nullptr;
11690 }
11691 break;
11692 }
11693 }
11694 if (!Comp)
11695 return nullptr;
11696
11697 MachineInstr *Update = nullptr;
11698 Register Init;
11699 bool IsUpdatePriorComp;
11700 unsigned UpdateCounterOprNum;
11701 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11702 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11703 return nullptr;
11704
11705 return std::make_unique<AArch64PipelinerLoopInfo>(
11706 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11707 Init, IsUpdatePriorComp, Cond);
11708}
11709
11710/// verifyInstruction - Perform target specific instruction verification.
11711bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11712 StringRef &ErrInfo) const {
11713 // Verify that immediate offsets on load/store instructions are within range.
11714 // Stack objects with an FI operand are excluded as they can be fixed up
11715 // during PEI.
11716 TypeSize Scale(0U, false), Width(0U, false);
11717 int64_t MinOffset, MaxOffset;
11718 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11719 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11720 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11721 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11722 if (Imm < MinOffset || Imm > MaxOffset) {
11723 ErrInfo = "Unexpected immediate on load/store instruction";
11724 return false;
11725 }
11726 }
11727 }
11728
11729 const MCInstrDesc &MCID = MI.getDesc();
11730 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11731 const MachineOperand &MO = MI.getOperand(Op);
11732 switch (MCID.operands()[Op].OperandType) {
11734 if (!MO.isImm() || MO.getImm() != 0) {
11735 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11736 return false;
11737 }
11738 break;
11740 if (!MO.isImm() ||
11742 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11743 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11744 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11745 return false;
11746 }
11747 break;
11748 default:
11749 break;
11750 }
11751 }
11752 return true;
11753}
11754
11755#define GET_INSTRINFO_HELPERS
11756#define GET_INSTRMAP_INFO
11757#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isFrameStoreOpcode(int Opcode)
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static bool isFrameLoadOpcode(int Opcode)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI)
static bool isANDOpcode(MachineInstr &MI)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static bool mustAvoidNeonAtMBBI(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
Returns true if in a streaming call site region without SME-FA64.
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool isInStreamingCallSiteRegion(MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
Returns true if the instruction at I is in a streaming call site region, within a single basic block.
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
SignReturnAddress getSignReturnAddressCondition() const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool shouldSignReturnAddress(SignReturnAddress Condition, bool IsLRSpilled)
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:123
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:576
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:618
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:591
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:688
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
static constexpr unsigned NoRegister
Definition MCRegister.h:60
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents a location in source code.
Definition SMLoc.h:22
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:49
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:40
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:233
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2530
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2168
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:236
bool optimizeTerminators(MachineBasicBlock *MBB, const TargetInstrInfo &TII)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.