LLVM 23.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/DebugLoc.h"
45#include "llvm/IR/GlobalValue.h"
46#include "llvm/IR/Module.h"
47#include "llvm/MC/MCAsmInfo.h"
48#include "llvm/MC/MCInst.h"
50#include "llvm/MC/MCInstrDesc.h"
55#include "llvm/Support/LEB128.h"
59#include <cassert>
60#include <cstdint>
61#include <iterator>
62#include <utility>
63
64using namespace llvm;
65
66#define GET_INSTRINFO_CTOR_DTOR
67#include "AArch64GenInstrInfo.inc"
68
69#define DEBUG_TYPE "AArch64InstrInfo"
70
71STATISTIC(NumCopyInstrs, "Number of COPY instructions expanded");
72STATISTIC(NumZCRegMoveInstrsGPR, "Number of zero-cycle GPR register move "
73 "instructions expanded from canonical COPY");
74STATISTIC(NumZCRegMoveInstrsFPR, "Number of zero-cycle FPR register move "
75 "instructions expanded from canonical COPY");
76STATISTIC(NumZCZeroingInstrsGPR, "Number of zero-cycle GPR zeroing "
77 "instructions expanded from canonical COPY");
78// NumZCZeroingInstrsFPR is counted at AArch64AsmPrinter
79
81 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
82 cl::desc("Restrict range of CB instructions (DEBUG)"));
83
85 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
86 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
87
89 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
90 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
91
93 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
94 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
95
97 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
98 cl::desc("Restrict range of B instructions (DEBUG)"));
99
101 "aarch64-search-limit", cl::Hidden, cl::init(2048),
102 cl::desc("Restrict range of instructions to search for the "
103 "machine-combiner gather pattern optimization"));
104
106 : AArch64GenInstrInfo(STI, RI, AArch64::ADJCALLSTACKDOWN,
107 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
108 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
109
110/// GetInstSize - Return the number of bytes of code the specified
111/// instruction may be. This returns the maximum number of bytes.
113 const MachineBasicBlock &MBB = *MI.getParent();
114 const MachineFunction *MF = MBB.getParent();
115 const Function &F = MF->getFunction();
116 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
117
118 {
119 auto Op = MI.getOpcode();
120 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
121 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
122 }
123
124 // Meta-instructions emit no code.
125 if (MI.isMetaInstruction())
126 return 0;
127
128 // FIXME: We currently only handle pseudoinstructions that don't get expanded
129 // before the assembly printer.
130 unsigned NumBytes = 0;
131 const MCInstrDesc &Desc = MI.getDesc();
132
133 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
134 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
135
136 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
137 if (!MFI->shouldSignReturnAddress(*MF))
138 return NumBytes;
139
140 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
141 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
142 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
143 return NumBytes;
144 }
145
146 // Size should be preferably set in
147 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
148 // Specific cases handle instructions of variable sizes
149 switch (Desc.getOpcode()) {
150 default:
151 if (Desc.getSize())
152 return Desc.getSize();
153
154 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
155 // with fixed constant size but not specified in .td file) is a normal
156 // 4-byte insn.
157 NumBytes = 4;
158 break;
159 case TargetOpcode::STACKMAP:
160 // The upper bound for a stackmap intrinsic is the full length of its shadow
161 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
162 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
163 break;
164 case TargetOpcode::PATCHPOINT:
165 // The size of the patchpoint intrinsic is the number of bytes requested
166 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
167 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
168 break;
169 case TargetOpcode::STATEPOINT:
170 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
171 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
172 // No patch bytes means a normal call inst is emitted
173 if (NumBytes == 0)
174 NumBytes = 4;
175 break;
176 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
177 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
178 // instructions are expanded to the specified number of NOPs. Otherwise,
179 // they are expanded to 36-byte XRay sleds.
180 NumBytes =
181 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
182 break;
183 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
184 case TargetOpcode::PATCHABLE_TAIL_CALL:
185 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
186 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
187 NumBytes = 36;
188 break;
189 case TargetOpcode::PATCHABLE_EVENT_CALL:
190 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
191 NumBytes = 24;
192 break;
193
194 case AArch64::SPACE:
195 NumBytes = MI.getOperand(1).getImm();
196 break;
197 case TargetOpcode::BUNDLE:
198 NumBytes = getInstBundleLength(MI);
199 break;
200 }
201
202 return NumBytes;
203}
204
205unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
206 unsigned Size = 0;
208 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
209 while (++I != E && I->isInsideBundle()) {
210 assert(!I->isBundle() && "No nested bundle!");
212 }
213 return Size;
214}
215
218 // Block ends with fall-through condbranch.
219 switch (LastInst->getOpcode()) {
220 default:
221 llvm_unreachable("Unknown branch instruction?");
222 case AArch64::Bcc:
223 Target = LastInst->getOperand(1).getMBB();
224 Cond.push_back(LastInst->getOperand(0));
225 break;
226 case AArch64::CBZW:
227 case AArch64::CBZX:
228 case AArch64::CBNZW:
229 case AArch64::CBNZX:
230 Target = LastInst->getOperand(1).getMBB();
231 Cond.push_back(MachineOperand::CreateImm(-1));
232 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
233 Cond.push_back(LastInst->getOperand(0));
234 break;
235 case AArch64::TBZW:
236 case AArch64::TBZX:
237 case AArch64::TBNZW:
238 case AArch64::TBNZX:
239 Target = LastInst->getOperand(2).getMBB();
240 Cond.push_back(MachineOperand::CreateImm(-1));
241 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
242 Cond.push_back(LastInst->getOperand(0));
243 Cond.push_back(LastInst->getOperand(1));
244 break;
245 case AArch64::CBWPri:
246 case AArch64::CBXPri:
247 case AArch64::CBWPrr:
248 case AArch64::CBXPrr:
249 Target = LastInst->getOperand(3).getMBB();
250 Cond.push_back(MachineOperand::CreateImm(-1));
251 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
252 Cond.push_back(LastInst->getOperand(0));
253 Cond.push_back(LastInst->getOperand(1));
254 Cond.push_back(LastInst->getOperand(2));
255 break;
256 case AArch64::CBBAssertExt:
257 case AArch64::CBHAssertExt:
258 Target = LastInst->getOperand(3).getMBB();
259 Cond.push_back(MachineOperand::CreateImm(-1)); // -1
260 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); // Opc
261 Cond.push_back(LastInst->getOperand(0)); // Cond
262 Cond.push_back(LastInst->getOperand(1)); // Op0
263 Cond.push_back(LastInst->getOperand(2)); // Op1
264 Cond.push_back(LastInst->getOperand(4)); // Ext0
265 Cond.push_back(LastInst->getOperand(5)); // Ext1
266 break;
267 }
268}
269
270static unsigned getBranchDisplacementBits(unsigned Opc) {
271 switch (Opc) {
272 default:
273 llvm_unreachable("unexpected opcode!");
274 case AArch64::B:
275 return BDisplacementBits;
276 case AArch64::TBNZW:
277 case AArch64::TBZW:
278 case AArch64::TBNZX:
279 case AArch64::TBZX:
280 return TBZDisplacementBits;
281 case AArch64::CBNZW:
282 case AArch64::CBZW:
283 case AArch64::CBNZX:
284 case AArch64::CBZX:
285 return CBZDisplacementBits;
286 case AArch64::Bcc:
287 return BCCDisplacementBits;
288 case AArch64::CBWPri:
289 case AArch64::CBXPri:
290 case AArch64::CBBAssertExt:
291 case AArch64::CBHAssertExt:
292 case AArch64::CBWPrr:
293 case AArch64::CBXPrr:
294 return CBDisplacementBits;
295 }
296}
297
299 int64_t BrOffset) const {
300 unsigned Bits = getBranchDisplacementBits(BranchOp);
301 assert(Bits >= 3 && "max branch displacement must be enough to jump"
302 "over conditional branch expansion");
303 return isIntN(Bits, BrOffset / 4);
304}
305
308 switch (MI.getOpcode()) {
309 default:
310 llvm_unreachable("unexpected opcode!");
311 case AArch64::B:
312 return MI.getOperand(0).getMBB();
313 case AArch64::TBZW:
314 case AArch64::TBNZW:
315 case AArch64::TBZX:
316 case AArch64::TBNZX:
317 return MI.getOperand(2).getMBB();
318 case AArch64::CBZW:
319 case AArch64::CBNZW:
320 case AArch64::CBZX:
321 case AArch64::CBNZX:
322 case AArch64::Bcc:
323 return MI.getOperand(1).getMBB();
324 case AArch64::CBWPri:
325 case AArch64::CBXPri:
326 case AArch64::CBBAssertExt:
327 case AArch64::CBHAssertExt:
328 case AArch64::CBWPrr:
329 case AArch64::CBXPrr:
330 return MI.getOperand(3).getMBB();
331 }
332}
333
335 MachineBasicBlock &NewDestBB,
336 MachineBasicBlock &RestoreBB,
337 const DebugLoc &DL,
338 int64_t BrOffset,
339 RegScavenger *RS) const {
340 assert(RS && "RegScavenger required for long branching");
341 assert(MBB.empty() &&
342 "new block should be inserted for expanding unconditional branch");
343 assert(MBB.pred_size() == 1);
344 assert(RestoreBB.empty() &&
345 "restore block should be inserted for restoring clobbered registers");
346
347 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
348 // Offsets outside of the signed 33-bit range are not supported for ADRP +
349 // ADD.
350 if (!isInt<33>(BrOffset))
352 "Branch offsets outside of the signed 33-bit range not supported");
353
354 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
355 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
356 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
357 .addReg(Reg)
358 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
359 .addImm(0);
360 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
361 };
362
363 RS->enterBasicBlockEnd(MBB);
364 // If X16 is unused, we can rely on the linker to insert a range extension
365 // thunk if NewDestBB is out of range of a single B instruction.
366 constexpr Register Reg = AArch64::X16;
367 if (!RS->isRegUsed(Reg)) {
368 insertUnconditionalBranch(MBB, &NewDestBB, DL);
369 RS->setRegUsed(Reg);
370 return;
371 }
372
373 // If there's a free register and it's worth inflating the code size,
374 // manually insert the indirect branch.
375 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
376 if (Scavenged != AArch64::NoRegister &&
377 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
378 buildIndirectBranch(Scavenged, NewDestBB);
379 RS->setRegUsed(Scavenged);
380 return;
381 }
382
383 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
384 // with red zones.
385 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
386 if (!AFI || AFI->hasRedZone().value_or(true))
388 "Unable to insert indirect branch inside function that has red zone");
389
390 // Otherwise, spill X16 and defer range extension to the linker.
391 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
392 .addReg(AArch64::SP, RegState::Define)
393 .addReg(Reg)
394 .addReg(AArch64::SP)
395 .addImm(-16);
396
397 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
398
399 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
400 .addReg(AArch64::SP, RegState::Define)
402 .addReg(AArch64::SP)
403 .addImm(16);
404}
405
406// Branch analysis.
409 MachineBasicBlock *&FBB,
411 bool AllowModify) const {
412 // If the block has no terminators, it just falls into the block after it.
413 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
414 if (I == MBB.end())
415 return false;
416
417 // Skip over SpeculationBarrierEndBB terminators
418 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
419 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
420 --I;
421 }
422
423 if (!isUnpredicatedTerminator(*I))
424 return false;
425
426 // Get the last instruction in the block.
427 MachineInstr *LastInst = &*I;
428
429 // If there is only one terminator instruction, process it.
430 unsigned LastOpc = LastInst->getOpcode();
431 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
432 if (isUncondBranchOpcode(LastOpc)) {
433 TBB = LastInst->getOperand(0).getMBB();
434 return false;
435 }
436 if (isCondBranchOpcode(LastOpc)) {
437 // Block ends with fall-through condbranch.
438 parseCondBranch(LastInst, TBB, Cond);
439 return false;
440 }
441 return true; // Can't handle indirect branch.
442 }
443
444 // Get the instruction before it if it is a terminator.
445 MachineInstr *SecondLastInst = &*I;
446 unsigned SecondLastOpc = SecondLastInst->getOpcode();
447
448 // If AllowModify is true and the block ends with two or more unconditional
449 // branches, delete all but the first unconditional branch.
450 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
451 while (isUncondBranchOpcode(SecondLastOpc)) {
452 LastInst->eraseFromParent();
453 LastInst = SecondLastInst;
454 LastOpc = LastInst->getOpcode();
455 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
456 // Return now the only terminator is an unconditional branch.
457 TBB = LastInst->getOperand(0).getMBB();
458 return false;
459 }
460 SecondLastInst = &*I;
461 SecondLastOpc = SecondLastInst->getOpcode();
462 }
463 }
464
465 // If we're allowed to modify and the block ends in a unconditional branch
466 // which could simply fallthrough, remove the branch. (Note: This case only
467 // matters when we can't understand the whole sequence, otherwise it's also
468 // handled by BranchFolding.cpp.)
469 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
470 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
471 LastInst->eraseFromParent();
472 LastInst = SecondLastInst;
473 LastOpc = LastInst->getOpcode();
474 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
475 assert(!isUncondBranchOpcode(LastOpc) &&
476 "unreachable unconditional branches removed above");
477
478 if (isCondBranchOpcode(LastOpc)) {
479 // Block ends with fall-through condbranch.
480 parseCondBranch(LastInst, TBB, Cond);
481 return false;
482 }
483 return true; // Can't handle indirect branch.
484 }
485 SecondLastInst = &*I;
486 SecondLastOpc = SecondLastInst->getOpcode();
487 }
488
489 // If there are three terminators, we don't know what sort of block this is.
490 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
491 return true;
492
493 // If the block ends with a B and a Bcc, handle it.
494 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
495 parseCondBranch(SecondLastInst, TBB, Cond);
496 FBB = LastInst->getOperand(0).getMBB();
497 return false;
498 }
499
500 // If the block ends with two unconditional branches, handle it. The second
501 // one is not executed, so remove it.
502 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
503 TBB = SecondLastInst->getOperand(0).getMBB();
504 I = LastInst;
505 if (AllowModify)
506 I->eraseFromParent();
507 return false;
508 }
509
510 // ...likewise if it ends with an indirect branch followed by an unconditional
511 // branch.
512 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
513 I = LastInst;
514 if (AllowModify)
515 I->eraseFromParent();
516 return true;
517 }
518
519 // Otherwise, can't handle this.
520 return true;
521}
522
524 MachineBranchPredicate &MBP,
525 bool AllowModify) const {
526 // Use analyzeBranch to validate the branch pattern.
527 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
529 if (analyzeBranch(MBB, TBB, FBB, Cond, AllowModify))
530 return true;
531
532 // analyzeBranch returns success with empty Cond for unconditional branches.
533 if (Cond.empty())
534 return true;
535
536 MBP.TrueDest = TBB;
537 assert(MBP.TrueDest && "expected!");
538 MBP.FalseDest = FBB ? FBB : MBB.getNextNode();
539
540 MBP.ConditionDef = nullptr;
541 MBP.SingleUseCondition = false;
542
543 // Find the conditional branch. After analyzeBranch succeeds with non-empty
544 // Cond, there's exactly one conditional branch - either last (fallthrough)
545 // or second-to-last (followed by unconditional B).
546 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
547 if (I == MBB.end())
548 return true;
549
550 if (isUncondBranchOpcode(I->getOpcode())) {
551 if (I == MBB.begin())
552 return true;
553 --I;
554 }
555
556 MachineInstr *CondBranch = &*I;
557 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
558
559 switch (CondBranch->getOpcode()) {
560 default:
561 return true;
562
563 case AArch64::Bcc:
564 // Bcc takes the NZCV flag as the operand to branch on, walk up the
565 // instruction stream to find the last instruction to define NZCV.
567 if (MI.modifiesRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
568 MBP.ConditionDef = &MI;
569 break;
570 }
571 }
572 return false;
573
574 case AArch64::CBZW:
575 case AArch64::CBZX:
576 case AArch64::CBNZW:
577 case AArch64::CBNZX: {
578 MBP.LHS = CondBranch->getOperand(0);
579 MBP.RHS = MachineOperand::CreateImm(0);
580 unsigned Opc = CondBranch->getOpcode();
581 MBP.Predicate = (Opc == AArch64::CBNZX || Opc == AArch64::CBNZW)
582 ? MachineBranchPredicate::PRED_NE
583 : MachineBranchPredicate::PRED_EQ;
584 Register CondReg = MBP.LHS.getReg();
585 if (CondReg.isVirtual())
586 MBP.ConditionDef = MRI.getVRegDef(CondReg);
587 return false;
588 }
589
590 case AArch64::TBZW:
591 case AArch64::TBZX:
592 case AArch64::TBNZW:
593 case AArch64::TBNZX: {
594 Register CondReg = CondBranch->getOperand(0).getReg();
595 if (CondReg.isVirtual())
596 MBP.ConditionDef = MRI.getVRegDef(CondReg);
597 return false;
598 }
599 }
600}
601
604 if (Cond[0].getImm() != -1) {
605 // Regular Bcc
606 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
608 } else {
609 // Folded compare-and-branch
610 switch (Cond[1].getImm()) {
611 default:
612 llvm_unreachable("Unknown conditional branch!");
613 case AArch64::CBZW:
614 Cond[1].setImm(AArch64::CBNZW);
615 break;
616 case AArch64::CBNZW:
617 Cond[1].setImm(AArch64::CBZW);
618 break;
619 case AArch64::CBZX:
620 Cond[1].setImm(AArch64::CBNZX);
621 break;
622 case AArch64::CBNZX:
623 Cond[1].setImm(AArch64::CBZX);
624 break;
625 case AArch64::TBZW:
626 Cond[1].setImm(AArch64::TBNZW);
627 break;
628 case AArch64::TBNZW:
629 Cond[1].setImm(AArch64::TBZW);
630 break;
631 case AArch64::TBZX:
632 Cond[1].setImm(AArch64::TBNZX);
633 break;
634 case AArch64::TBNZX:
635 Cond[1].setImm(AArch64::TBZX);
636 break;
637
638 // Cond is { -1, Opcode, CC, Op0, Op1, ... }
639 case AArch64::CBWPri:
640 case AArch64::CBXPri:
641 case AArch64::CBBAssertExt:
642 case AArch64::CBHAssertExt:
643 case AArch64::CBWPrr:
644 case AArch64::CBXPrr: {
645 // Pseudos using standard 4bit Arm condition codes
647 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
649 }
650 }
651 }
652
653 return false;
654}
655
657 int *BytesRemoved) const {
658 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
659 if (I == MBB.end())
660 return 0;
661
662 if (!isUncondBranchOpcode(I->getOpcode()) &&
663 !isCondBranchOpcode(I->getOpcode()))
664 return 0;
665
666 // Remove the branch.
667 I->eraseFromParent();
668
669 I = MBB.end();
670
671 if (I == MBB.begin()) {
672 if (BytesRemoved)
673 *BytesRemoved = 4;
674 return 1;
675 }
676 --I;
677 if (!isCondBranchOpcode(I->getOpcode())) {
678 if (BytesRemoved)
679 *BytesRemoved = 4;
680 return 1;
681 }
682
683 // Remove the branch.
684 I->eraseFromParent();
685 if (BytesRemoved)
686 *BytesRemoved = 8;
687
688 return 2;
689}
690
691void AArch64InstrInfo::instantiateCondBranch(
694 if (Cond[0].getImm() != -1) {
695 // Regular Bcc
696 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
697 } else {
698 // Folded compare-and-branch
699 // Note that we use addOperand instead of addReg to keep the flags.
700
701 // cbz, cbnz
702 const MachineInstrBuilder MIB =
703 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
704
705 // tbz/tbnz
706 if (Cond.size() > 3)
707 MIB.add(Cond[3]);
708
709 // cb
710 if (Cond.size() > 4)
711 MIB.add(Cond[4]);
712
713 MIB.addMBB(TBB);
714
715 // cb[b,h]
716 if (Cond.size() > 5) {
717 MIB.addImm(Cond[5].getImm());
718 MIB.addImm(Cond[6].getImm());
719 }
720 }
721}
722
725 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
726 // Shouldn't be a fall through.
727 assert(TBB && "insertBranch must not be told to insert a fallthrough");
728
729 if (!FBB) {
730 if (Cond.empty()) // Unconditional branch?
731 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
732 else
733 instantiateCondBranch(MBB, DL, TBB, Cond);
734
735 if (BytesAdded)
736 *BytesAdded = 4;
737
738 return 1;
739 }
740
741 // Two-way conditional branch.
742 instantiateCondBranch(MBB, DL, TBB, Cond);
743 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
744
745 if (BytesAdded)
746 *BytesAdded = 8;
747
748 return 2;
749}
750
752 const TargetInstrInfo &TII) {
753 for (MachineInstr &MI : MBB->terminators()) {
754 unsigned Opc = MI.getOpcode();
755 switch (Opc) {
756 case AArch64::CBZW:
757 case AArch64::CBZX:
758 case AArch64::TBZW:
759 case AArch64::TBZX:
760 // CBZ/TBZ with WZR/XZR -> unconditional B
761 if (MI.getOperand(0).getReg() == AArch64::WZR ||
762 MI.getOperand(0).getReg() == AArch64::XZR) {
763 DEBUG_WITH_TYPE("optimizeTerminators",
764 dbgs() << "Removing always taken branch: " << MI);
765 MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
766 SmallVector<MachineBasicBlock *> Succs(MBB->successors());
767 for (auto *S : Succs)
768 if (S != Target)
769 MBB->removeSuccessor(S);
770 DebugLoc DL = MI.getDebugLoc();
771 while (MBB->rbegin() != &MI)
772 MBB->rbegin()->eraseFromParent();
773 MI.eraseFromParent();
774 BuildMI(MBB, DL, TII.get(AArch64::B)).addMBB(Target);
775 return true;
776 }
777 break;
778 case AArch64::CBNZW:
779 case AArch64::CBNZX:
780 case AArch64::TBNZW:
781 case AArch64::TBNZX:
782 // CBNZ/TBNZ with WZR/XZR -> never taken, remove branch and successor
783 if (MI.getOperand(0).getReg() == AArch64::WZR ||
784 MI.getOperand(0).getReg() == AArch64::XZR) {
785 DEBUG_WITH_TYPE("optimizeTerminators",
786 dbgs() << "Removing never taken branch: " << MI);
787 MachineBasicBlock *Target = TII.getBranchDestBlock(MI);
788 MI.getParent()->removeSuccessor(Target);
789 MI.eraseFromParent();
790 return true;
791 }
792 break;
793 }
794 }
795 return false;
796}
797
798// Find the original register that VReg is copied from.
799static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
800 while (Register::isVirtualRegister(VReg)) {
801 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
802 if (!DefMI->isFullCopy())
803 return VReg;
804 VReg = DefMI->getOperand(1).getReg();
805 }
806 return VReg;
807}
808
809// Determine if VReg is defined by an instruction that can be folded into a
810// csel instruction. If so, return the folded opcode, and the replacement
811// register.
812static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
813 unsigned *NewReg = nullptr) {
814 VReg = removeCopies(MRI, VReg);
816 return 0;
817
818 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
819 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
820 unsigned Opc = 0;
821 unsigned SrcReg = 0;
822 switch (DefMI->getOpcode()) {
823 case AArch64::SUBREG_TO_REG:
824 // Check for the following way to define an 64-bit immediate:
825 // %0:gpr32 = MOVi32imm 1
826 // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
827 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
828 return 0;
829 if (!DefMI->getOperand(2).isReg())
830 return 0;
831 if (!DefMI->getOperand(3).isImm() ||
832 DefMI->getOperand(3).getImm() != AArch64::sub_32)
833 return 0;
834 DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
835 if (DefMI->getOpcode() != AArch64::MOVi32imm)
836 return 0;
837 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
838 return 0;
839 assert(Is64Bit);
840 SrcReg = AArch64::XZR;
841 Opc = AArch64::CSINCXr;
842 break;
843
844 case AArch64::MOVi32imm:
845 case AArch64::MOVi64imm:
846 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
847 return 0;
848 SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
849 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
850 break;
851
852 case AArch64::ADDSXri:
853 case AArch64::ADDSWri:
854 // if NZCV is used, do not fold.
855 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
856 true) == -1)
857 return 0;
858 // fall-through to ADDXri and ADDWri.
859 [[fallthrough]];
860 case AArch64::ADDXri:
861 case AArch64::ADDWri:
862 // add x, 1 -> csinc.
863 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
864 DefMI->getOperand(3).getImm() != 0)
865 return 0;
866 SrcReg = DefMI->getOperand(1).getReg();
867 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
868 break;
869
870 case AArch64::ORNXrr:
871 case AArch64::ORNWrr: {
872 // not x -> csinv, represented as orn dst, xzr, src.
873 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
874 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
875 return 0;
876 SrcReg = DefMI->getOperand(2).getReg();
877 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
878 break;
879 }
880
881 case AArch64::SUBSXrr:
882 case AArch64::SUBSWrr:
883 // if NZCV is used, do not fold.
884 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
885 true) == -1)
886 return 0;
887 // fall-through to SUBXrr and SUBWrr.
888 [[fallthrough]];
889 case AArch64::SUBXrr:
890 case AArch64::SUBWrr: {
891 // neg x -> csneg, represented as sub dst, xzr, src.
892 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
893 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
894 return 0;
895 SrcReg = DefMI->getOperand(2).getReg();
896 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
897 break;
898 }
899 default:
900 return 0;
901 }
902 assert(Opc && SrcReg && "Missing parameters");
903
904 if (NewReg)
905 *NewReg = SrcReg;
906 return Opc;
907}
908
911 Register DstReg, Register TrueReg,
912 Register FalseReg, int &CondCycles,
913 int &TrueCycles,
914 int &FalseCycles) const {
915 // Check register classes.
916 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
917 const TargetRegisterClass *RC =
918 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
919 if (!RC)
920 return false;
921
922 // Also need to check the dest regclass, in case we're trying to optimize
923 // something like:
924 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
925 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
926 return false;
927
928 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
929 unsigned ExtraCondLat = Cond.size() != 1;
930
931 // GPRs are handled by csel.
932 // FIXME: Fold in x+1, -x, and ~x when applicable.
933 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
934 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
935 // Single-cycle csel, csinc, csinv, and csneg.
936 CondCycles = 1 + ExtraCondLat;
937 TrueCycles = FalseCycles = 1;
938 if (canFoldIntoCSel(MRI, TrueReg))
939 TrueCycles = 0;
940 else if (canFoldIntoCSel(MRI, FalseReg))
941 FalseCycles = 0;
942 return true;
943 }
944
945 // Scalar floating point is handled by fcsel.
946 // FIXME: Form fabs, fmin, and fmax when applicable.
947 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
948 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
949 CondCycles = 5 + ExtraCondLat;
950 TrueCycles = FalseCycles = 2;
951 return true;
952 }
953
954 // Can't do vectors.
955 return false;
956}
957
960 const DebugLoc &DL, Register DstReg,
962 Register TrueReg, Register FalseReg) const {
963 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
964
965 // Parse the condition code, see parseCondBranch() above.
967 switch (Cond.size()) {
968 default:
969 llvm_unreachable("Unknown condition opcode in Cond");
970 case 1: // b.cc
972 break;
973 case 3: { // cbz/cbnz
974 // We must insert a compare against 0.
975 bool Is64Bit;
976 switch (Cond[1].getImm()) {
977 default:
978 llvm_unreachable("Unknown branch opcode in Cond");
979 case AArch64::CBZW:
980 Is64Bit = false;
981 CC = AArch64CC::EQ;
982 break;
983 case AArch64::CBZX:
984 Is64Bit = true;
985 CC = AArch64CC::EQ;
986 break;
987 case AArch64::CBNZW:
988 Is64Bit = false;
989 CC = AArch64CC::NE;
990 break;
991 case AArch64::CBNZX:
992 Is64Bit = true;
993 CC = AArch64CC::NE;
994 break;
995 }
996 Register SrcReg = Cond[2].getReg();
997 if (Is64Bit) {
998 // cmp reg, #0 is actually subs xzr, reg, #0.
999 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
1000 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
1001 .addReg(SrcReg)
1002 .addImm(0)
1003 .addImm(0);
1004 } else {
1005 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
1006 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
1007 .addReg(SrcReg)
1008 .addImm(0)
1009 .addImm(0);
1010 }
1011 break;
1012 }
1013 case 4: { // tbz/tbnz
1014 // We must insert a tst instruction.
1015 switch (Cond[1].getImm()) {
1016 default:
1017 llvm_unreachable("Unknown branch opcode in Cond");
1018 case AArch64::TBZW:
1019 case AArch64::TBZX:
1020 CC = AArch64CC::EQ;
1021 break;
1022 case AArch64::TBNZW:
1023 case AArch64::TBNZX:
1024 CC = AArch64CC::NE;
1025 break;
1026 }
1027 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
1028 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
1029 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
1030 .addReg(Cond[2].getReg())
1031 .addImm(
1033 else
1034 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
1035 .addReg(Cond[2].getReg())
1036 .addImm(
1038 break;
1039 }
1040 case 5: { // cb
1041 // We must insert a cmp, that is a subs
1042 // 0 1 2 3 4
1043 // Cond is { -1, Opcode, CC, Op0, Op1 }
1044
1045 unsigned SubsOpc, SubsDestReg;
1046 bool IsImm = false;
1047 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1048 switch (Cond[1].getImm()) {
1049 default:
1050 llvm_unreachable("Unknown branch opcode in Cond");
1051 case AArch64::CBWPri:
1052 SubsOpc = AArch64::SUBSWri;
1053 SubsDestReg = AArch64::WZR;
1054 IsImm = true;
1055 break;
1056 case AArch64::CBXPri:
1057 SubsOpc = AArch64::SUBSXri;
1058 SubsDestReg = AArch64::XZR;
1059 IsImm = true;
1060 break;
1061 case AArch64::CBWPrr:
1062 SubsOpc = AArch64::SUBSWrr;
1063 SubsDestReg = AArch64::WZR;
1064 IsImm = false;
1065 break;
1066 case AArch64::CBXPrr:
1067 SubsOpc = AArch64::SUBSXrr;
1068 SubsDestReg = AArch64::XZR;
1069 IsImm = false;
1070 break;
1071 }
1072
1073 if (IsImm)
1074 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
1075 .addReg(Cond[3].getReg())
1076 .addImm(Cond[4].getImm())
1077 .addImm(0);
1078 else
1079 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
1080 .addReg(Cond[3].getReg())
1081 .addReg(Cond[4].getReg());
1082 } break;
1083 case 7: { // cb[b,h]
1084 // We must insert a cmp, that is a subs, but also zero- or sign-extensions
1085 // that have been folded. For the first operand we codegen an explicit
1086 // extension, for the second operand we fold the extension into cmp.
1087 // 0 1 2 3 4 5 6
1088 // Cond is { -1, Opcode, CC, Op0, Op1, Ext0, Ext1 }
1089
1090 // We need a new register for the now explicitly extended register
1091 Register Reg = Cond[4].getReg();
1093 unsigned ExtOpc;
1094 unsigned ExtBits;
1095 AArch64_AM::ShiftExtendType ExtendType =
1097 switch (ExtendType) {
1098 default:
1099 llvm_unreachable("Unknown shift-extend for CB instruction");
1100 case AArch64_AM::SXTB:
1101 assert(
1102 Cond[1].getImm() == AArch64::CBBAssertExt &&
1103 "Unexpected compare-and-branch instruction for SXTB shift-extend");
1104 ExtOpc = AArch64::SBFMWri;
1105 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1106 break;
1107 case AArch64_AM::SXTH:
1108 assert(
1109 Cond[1].getImm() == AArch64::CBHAssertExt &&
1110 "Unexpected compare-and-branch instruction for SXTH shift-extend");
1111 ExtOpc = AArch64::SBFMWri;
1112 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1113 break;
1114 case AArch64_AM::UXTB:
1115 assert(
1116 Cond[1].getImm() == AArch64::CBBAssertExt &&
1117 "Unexpected compare-and-branch instruction for UXTB shift-extend");
1118 ExtOpc = AArch64::ANDWri;
1119 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1120 break;
1121 case AArch64_AM::UXTH:
1122 assert(
1123 Cond[1].getImm() == AArch64::CBHAssertExt &&
1124 "Unexpected compare-and-branch instruction for UXTH shift-extend");
1125 ExtOpc = AArch64::ANDWri;
1126 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1127 break;
1128 }
1129
1130 // Build the explicit extension of the first operand
1131 Reg = MRI.createVirtualRegister(&AArch64::GPR32spRegClass);
1133 BuildMI(MBB, I, DL, get(ExtOpc), Reg).addReg(Cond[4].getReg());
1134 if (ExtOpc != AArch64::ANDWri)
1135 MBBI.addImm(0);
1136 MBBI.addImm(ExtBits);
1137 }
1138
1139 // Now, subs with an extended second operand
1141 AArch64_AM::ShiftExtendType ExtendType =
1143 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1144 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1145 BuildMI(MBB, I, DL, get(AArch64::SUBSWrx), AArch64::WZR)
1146 .addReg(Cond[3].getReg())
1147 .addReg(Reg)
1148 .addImm(AArch64_AM::getArithExtendImm(ExtendType, 0));
1149 } // If no extension is needed, just a regular subs
1150 else {
1151 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1152 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1153 BuildMI(MBB, I, DL, get(AArch64::SUBSWrr), AArch64::WZR)
1154 .addReg(Cond[3].getReg())
1155 .addReg(Reg);
1156 }
1157
1158 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1159 } break;
1160 }
1161
1162 unsigned Opc = 0;
1163 const TargetRegisterClass *RC = nullptr;
1164 bool TryFold = false;
1165 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
1166 RC = &AArch64::GPR64RegClass;
1167 Opc = AArch64::CSELXr;
1168 TryFold = true;
1169 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
1170 RC = &AArch64::GPR32RegClass;
1171 Opc = AArch64::CSELWr;
1172 TryFold = true;
1173 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
1174 RC = &AArch64::FPR64RegClass;
1175 Opc = AArch64::FCSELDrrr;
1176 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
1177 RC = &AArch64::FPR32RegClass;
1178 Opc = AArch64::FCSELSrrr;
1179 }
1180 assert(RC && "Unsupported regclass");
1181
1182 // Try folding simple instructions into the csel.
1183 if (TryFold) {
1184 unsigned NewReg = 0;
1185 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
1186 if (FoldedOpc) {
1187 // The folded opcodes csinc, csinc and csneg apply the operation to
1188 // FalseReg, so we need to invert the condition.
1190 TrueReg = FalseReg;
1191 } else
1192 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
1193
1194 // Fold the operation. Leave any dead instructions for DCE to clean up.
1195 if (FoldedOpc) {
1196 FalseReg = NewReg;
1197 Opc = FoldedOpc;
1198 // Extend the live range of NewReg.
1199 MRI.clearKillFlags(NewReg);
1200 }
1201 }
1202
1203 // Pull all virtual register into the appropriate class.
1204 MRI.constrainRegClass(TrueReg, RC);
1205 // FalseReg might be WZR or XZR if the folded operand is a literal 1.
1206 assert(
1207 (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
1208 FalseReg == AArch64::XZR) &&
1209 "FalseReg was folded into a non-virtual register other than WZR or XZR");
1210 if (FalseReg.isVirtual())
1211 MRI.constrainRegClass(FalseReg, RC);
1212
1213 // Insert the csel.
1214 BuildMI(MBB, I, DL, get(Opc), DstReg)
1215 .addReg(TrueReg)
1216 .addReg(FalseReg)
1217 .addImm(CC);
1218}
1219
1220// Return true if Imm can be loaded into a register by a "cheap" sequence of
1221// instructions. For now, "cheap" means at most two instructions.
1222static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1223 if (BitSize == 32)
1224 return true;
1225
1226 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1227 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1229 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1230
1231 return Is.size() <= 2;
1232}
1233
1234// Check if a COPY instruction is cheap.
1235static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI) {
1236 assert(MI.isCopy() && "Expected COPY instruction");
1237 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1238
1239 // Cross-bank copies (e.g., between GPR and FPR) are expensive on AArch64,
1240 // typically requiring an FMOV instruction with a 2-6 cycle latency.
1241 auto GetRegClass = [&](Register Reg) -> const TargetRegisterClass * {
1242 if (Reg.isVirtual())
1243 return MRI.getRegClass(Reg);
1244 if (Reg.isPhysical())
1245 return RI.getMinimalPhysRegClass(Reg);
1246 return nullptr;
1247 };
1248 const TargetRegisterClass *DstRC = GetRegClass(MI.getOperand(0).getReg());
1249 const TargetRegisterClass *SrcRC = GetRegClass(MI.getOperand(1).getReg());
1250 if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC))
1251 return false;
1252
1253 return MI.isAsCheapAsAMove();
1254}
1255
1256// FIXME: this implementation should be micro-architecture dependent, so a
1257// micro-architecture target hook should be introduced here in future.
1259 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1260 if (isExynosCheapAsMove(MI))
1261 return true;
1262 return MI.isAsCheapAsAMove();
1263 }
1264
1265 switch (MI.getOpcode()) {
1266 default:
1267 return MI.isAsCheapAsAMove();
1268
1269 case TargetOpcode::COPY:
1270 return isCheapCopy(MI, RI);
1271
1272 case AArch64::ADDWrs:
1273 case AArch64::ADDXrs:
1274 case AArch64::SUBWrs:
1275 case AArch64::SUBXrs:
1276 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1277
1278 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1279 // ORRXri, it is as cheap as MOV.
1280 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1281 case AArch64::MOVi32imm:
1282 return isCheapImmediate(MI, 32);
1283 case AArch64::MOVi64imm:
1284 return isCheapImmediate(MI, 64);
1285 }
1286}
1287
1288bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1289 switch (MI.getOpcode()) {
1290 default:
1291 return false;
1292
1293 case AArch64::ADDWrs:
1294 case AArch64::ADDXrs:
1295 case AArch64::ADDSWrs:
1296 case AArch64::ADDSXrs: {
1297 unsigned Imm = MI.getOperand(3).getImm();
1298 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1299 if (ShiftVal == 0)
1300 return true;
1301 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1302 }
1303
1304 case AArch64::ADDWrx:
1305 case AArch64::ADDXrx:
1306 case AArch64::ADDXrx64:
1307 case AArch64::ADDSWrx:
1308 case AArch64::ADDSXrx:
1309 case AArch64::ADDSXrx64: {
1310 unsigned Imm = MI.getOperand(3).getImm();
1311 switch (AArch64_AM::getArithExtendType(Imm)) {
1312 default:
1313 return false;
1314 case AArch64_AM::UXTB:
1315 case AArch64_AM::UXTH:
1316 case AArch64_AM::UXTW:
1317 case AArch64_AM::UXTX:
1318 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1319 }
1320 }
1321
1322 case AArch64::SUBWrs:
1323 case AArch64::SUBSWrs: {
1324 unsigned Imm = MI.getOperand(3).getImm();
1325 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1326 return ShiftVal == 0 ||
1327 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1328 }
1329
1330 case AArch64::SUBXrs:
1331 case AArch64::SUBSXrs: {
1332 unsigned Imm = MI.getOperand(3).getImm();
1333 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1334 return ShiftVal == 0 ||
1335 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1336 }
1337
1338 case AArch64::SUBWrx:
1339 case AArch64::SUBXrx:
1340 case AArch64::SUBXrx64:
1341 case AArch64::SUBSWrx:
1342 case AArch64::SUBSXrx:
1343 case AArch64::SUBSXrx64: {
1344 unsigned Imm = MI.getOperand(3).getImm();
1345 switch (AArch64_AM::getArithExtendType(Imm)) {
1346 default:
1347 return false;
1348 case AArch64_AM::UXTB:
1349 case AArch64_AM::UXTH:
1350 case AArch64_AM::UXTW:
1351 case AArch64_AM::UXTX:
1352 return AArch64_AM::getArithShiftValue(Imm) == 0;
1353 }
1354 }
1355
1356 case AArch64::LDRBBroW:
1357 case AArch64::LDRBBroX:
1358 case AArch64::LDRBroW:
1359 case AArch64::LDRBroX:
1360 case AArch64::LDRDroW:
1361 case AArch64::LDRDroX:
1362 case AArch64::LDRHHroW:
1363 case AArch64::LDRHHroX:
1364 case AArch64::LDRHroW:
1365 case AArch64::LDRHroX:
1366 case AArch64::LDRQroW:
1367 case AArch64::LDRQroX:
1368 case AArch64::LDRSBWroW:
1369 case AArch64::LDRSBWroX:
1370 case AArch64::LDRSBXroW:
1371 case AArch64::LDRSBXroX:
1372 case AArch64::LDRSHWroW:
1373 case AArch64::LDRSHWroX:
1374 case AArch64::LDRSHXroW:
1375 case AArch64::LDRSHXroX:
1376 case AArch64::LDRSWroW:
1377 case AArch64::LDRSWroX:
1378 case AArch64::LDRSroW:
1379 case AArch64::LDRSroX:
1380 case AArch64::LDRWroW:
1381 case AArch64::LDRWroX:
1382 case AArch64::LDRXroW:
1383 case AArch64::LDRXroX:
1384 case AArch64::PRFMroW:
1385 case AArch64::PRFMroX:
1386 case AArch64::STRBBroW:
1387 case AArch64::STRBBroX:
1388 case AArch64::STRBroW:
1389 case AArch64::STRBroX:
1390 case AArch64::STRDroW:
1391 case AArch64::STRDroX:
1392 case AArch64::STRHHroW:
1393 case AArch64::STRHHroX:
1394 case AArch64::STRHroW:
1395 case AArch64::STRHroX:
1396 case AArch64::STRQroW:
1397 case AArch64::STRQroX:
1398 case AArch64::STRSroW:
1399 case AArch64::STRSroX:
1400 case AArch64::STRWroW:
1401 case AArch64::STRWroX:
1402 case AArch64::STRXroW:
1403 case AArch64::STRXroX: {
1404 unsigned IsSigned = MI.getOperand(3).getImm();
1405 return !IsSigned;
1406 }
1407 }
1408}
1409
1410bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1411 unsigned Opc = MI.getOpcode();
1412 switch (Opc) {
1413 default:
1414 return false;
1415 case AArch64::SEH_StackAlloc:
1416 case AArch64::SEH_SaveFPLR:
1417 case AArch64::SEH_SaveFPLR_X:
1418 case AArch64::SEH_SaveReg:
1419 case AArch64::SEH_SaveReg_X:
1420 case AArch64::SEH_SaveRegP:
1421 case AArch64::SEH_SaveRegP_X:
1422 case AArch64::SEH_SaveFReg:
1423 case AArch64::SEH_SaveFReg_X:
1424 case AArch64::SEH_SaveFRegP:
1425 case AArch64::SEH_SaveFRegP_X:
1426 case AArch64::SEH_SetFP:
1427 case AArch64::SEH_AddFP:
1428 case AArch64::SEH_Nop:
1429 case AArch64::SEH_PrologEnd:
1430 case AArch64::SEH_EpilogStart:
1431 case AArch64::SEH_EpilogEnd:
1432 case AArch64::SEH_PACSignLR:
1433 case AArch64::SEH_SaveAnyRegI:
1434 case AArch64::SEH_SaveAnyRegIP:
1435 case AArch64::SEH_SaveAnyRegQP:
1436 case AArch64::SEH_SaveAnyRegQPX:
1437 case AArch64::SEH_AllocZ:
1438 case AArch64::SEH_SaveZReg:
1439 case AArch64::SEH_SavePReg:
1440 return true;
1441 }
1442}
1443
1445 Register &SrcReg, Register &DstReg,
1446 unsigned &SubIdx) const {
1447 switch (MI.getOpcode()) {
1448 default:
1449 return false;
1450 case AArch64::SBFMXri: // aka sxtw
1451 case AArch64::UBFMXri: // aka uxtw
1452 // Check for the 32 -> 64 bit extension case, these instructions can do
1453 // much more.
1454 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1455 return false;
1456 // This is a signed or unsigned 32 -> 64 bit extension.
1457 SrcReg = MI.getOperand(1).getReg();
1458 DstReg = MI.getOperand(0).getReg();
1459 SubIdx = AArch64::sub_32;
1460 return true;
1461 }
1462}
1463
1465 const MachineInstr &MIa, const MachineInstr &MIb) const {
1467 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1468 int64_t OffsetA = 0, OffsetB = 0;
1469 TypeSize WidthA(0, false), WidthB(0, false);
1470 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1471
1472 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1473 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1474
1477 return false;
1478
1479 // Retrieve the base, offset from the base and width. Width
1480 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1481 // base are identical, and the offset of a lower memory access +
1482 // the width doesn't overlap the offset of a higher memory access,
1483 // then the memory accesses are different.
1484 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1485 // are assumed to have the same scale (vscale).
1486 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1487 WidthA, TRI) &&
1488 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1489 WidthB, TRI)) {
1490 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1491 OffsetAIsScalable == OffsetBIsScalable) {
1492 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1493 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1494 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1495 if (LowWidth.isScalable() == OffsetAIsScalable &&
1496 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1497 return true;
1498 }
1499 }
1500 return false;
1501}
1502
1504 const MachineBasicBlock *MBB,
1505 const MachineFunction &MF) const {
1507 return true;
1508
1509 // Do not move an instruction that can be recognized as a branch target.
1510 if (hasBTISemantics(MI))
1511 return true;
1512
1513 switch (MI.getOpcode()) {
1514 case AArch64::HINT:
1515 // CSDB hints are scheduling barriers.
1516 if (MI.getOperand(0).getImm() == 0x14)
1517 return true;
1518 break;
1519 case AArch64::DSB:
1520 case AArch64::ISB:
1521 // DSB and ISB also are scheduling barriers.
1522 return true;
1523 case AArch64::MSRpstatesvcrImm1:
1524 // SMSTART and SMSTOP are also scheduling barriers.
1525 return true;
1526 default:;
1527 }
1528 if (isSEHInstruction(MI))
1529 return true;
1530 auto Next = std::next(MI.getIterator());
1531 return Next != MBB->end() && Next->isCFIInstruction();
1532}
1533
1534/// analyzeCompare - For a comparison instruction, return the source registers
1535/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1536/// Return true if the comparison instruction can be analyzed.
1538 Register &SrcReg2, int64_t &CmpMask,
1539 int64_t &CmpValue) const {
1540 // The first operand can be a frame index where we'd normally expect a
1541 // register.
1542 // FIXME: Pass subregisters out of analyzeCompare
1543 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1544 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1545 return false;
1546
1547 switch (MI.getOpcode()) {
1548 default:
1549 break;
1550 case AArch64::PTEST_PP:
1551 case AArch64::PTEST_PP_ANY:
1552 case AArch64::PTEST_PP_FIRST:
1553 SrcReg = MI.getOperand(0).getReg();
1554 SrcReg2 = MI.getOperand(1).getReg();
1555 if (MI.getOperand(2).getSubReg())
1556 return false;
1557
1558 // Not sure about the mask and value for now...
1559 CmpMask = ~0;
1560 CmpValue = 0;
1561 return true;
1562 case AArch64::SUBSWrr:
1563 case AArch64::SUBSWrs:
1564 case AArch64::SUBSWrx:
1565 case AArch64::SUBSXrr:
1566 case AArch64::SUBSXrs:
1567 case AArch64::SUBSXrx:
1568 case AArch64::ADDSWrr:
1569 case AArch64::ADDSWrs:
1570 case AArch64::ADDSWrx:
1571 case AArch64::ADDSXrr:
1572 case AArch64::ADDSXrs:
1573 case AArch64::ADDSXrx:
1574 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1575 SrcReg = MI.getOperand(1).getReg();
1576 SrcReg2 = MI.getOperand(2).getReg();
1577
1578 // FIXME: Pass subregisters out of analyzeCompare
1579 if (MI.getOperand(2).getSubReg())
1580 return false;
1581
1582 CmpMask = ~0;
1583 CmpValue = 0;
1584 return true;
1585 case AArch64::SUBSWri:
1586 case AArch64::ADDSWri:
1587 case AArch64::SUBSXri:
1588 case AArch64::ADDSXri:
1589 SrcReg = MI.getOperand(1).getReg();
1590 SrcReg2 = 0;
1591 CmpMask = ~0;
1592 CmpValue = MI.getOperand(2).getImm();
1593 return true;
1594 case AArch64::ANDSWri:
1595 case AArch64::ANDSXri:
1596 // ANDS does not use the same encoding scheme as the others xxxS
1597 // instructions.
1598 SrcReg = MI.getOperand(1).getReg();
1599 SrcReg2 = 0;
1600 CmpMask = ~0;
1602 MI.getOperand(2).getImm(),
1603 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1604 return true;
1605 }
1606
1607 return false;
1608}
1609
1611 MachineBasicBlock *MBB = Instr.getParent();
1612 assert(MBB && "Can't get MachineBasicBlock here");
1613 MachineFunction *MF = MBB->getParent();
1614 assert(MF && "Can't get MachineFunction here");
1618
1619 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1620 ++OpIdx) {
1621 MachineOperand &MO = Instr.getOperand(OpIdx);
1622 const TargetRegisterClass *OpRegCstraints =
1623 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1624
1625 // If there's no constraint, there's nothing to do.
1626 if (!OpRegCstraints)
1627 continue;
1628 // If the operand is a frame index, there's nothing to do here.
1629 // A frame index operand will resolve correctly during PEI.
1630 if (MO.isFI())
1631 continue;
1632
1633 assert(MO.isReg() &&
1634 "Operand has register constraints without being a register!");
1635
1636 Register Reg = MO.getReg();
1637 if (Reg.isPhysical()) {
1638 if (!OpRegCstraints->contains(Reg))
1639 return false;
1640 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1641 !MRI->constrainRegClass(Reg, OpRegCstraints))
1642 return false;
1643 }
1644
1645 return true;
1646}
1647
1648/// Return the opcode that does not set flags when possible - otherwise
1649/// return the original opcode. The caller is responsible to do the actual
1650/// substitution and legality checking.
1652 // Don't convert all compare instructions, because for some the zero register
1653 // encoding becomes the sp register.
1654 bool MIDefinesZeroReg = false;
1655 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1656 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1657 MIDefinesZeroReg = true;
1658
1659 switch (MI.getOpcode()) {
1660 default:
1661 return MI.getOpcode();
1662 case AArch64::ADDSWrr:
1663 return AArch64::ADDWrr;
1664 case AArch64::ADDSWri:
1665 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1666 case AArch64::ADDSWrs:
1667 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1668 case AArch64::ADDSWrx:
1669 return AArch64::ADDWrx;
1670 case AArch64::ADDSXrr:
1671 return AArch64::ADDXrr;
1672 case AArch64::ADDSXri:
1673 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1674 case AArch64::ADDSXrs:
1675 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1676 case AArch64::ADDSXrx:
1677 return AArch64::ADDXrx;
1678 case AArch64::SUBSWrr:
1679 return AArch64::SUBWrr;
1680 case AArch64::SUBSWri:
1681 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1682 case AArch64::SUBSWrs:
1683 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1684 case AArch64::SUBSWrx:
1685 return AArch64::SUBWrx;
1686 case AArch64::SUBSXrr:
1687 return AArch64::SUBXrr;
1688 case AArch64::SUBSXri:
1689 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1690 case AArch64::SUBSXrs:
1691 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1692 case AArch64::SUBSXrx:
1693 return AArch64::SUBXrx;
1694 }
1695}
1696
1697enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1698
1699/// True when condition flags are accessed (either by writing or reading)
1700/// on the instruction trace starting at From and ending at To.
1701///
1702/// Note: If From and To are from different blocks it's assumed CC are accessed
1703/// on the path.
1706 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1707 // Early exit if To is at the beginning of the BB.
1708 if (To == To->getParent()->begin())
1709 return true;
1710
1711 // Check whether the instructions are in the same basic block
1712 // If not, assume the condition flags might get modified somewhere.
1713 if (To->getParent() != From->getParent())
1714 return true;
1715
1716 // From must be above To.
1717 assert(std::any_of(
1718 ++To.getReverse(), To->getParent()->rend(),
1719 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1720
1721 // We iterate backward starting at \p To until we hit \p From.
1722 for (const MachineInstr &Instr :
1724 if (((AccessToCheck & AK_Write) &&
1725 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1726 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1727 return true;
1728 }
1729 return false;
1730}
1731
1732std::optional<unsigned>
1733AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1734 MachineInstr *Pred,
1735 const MachineRegisterInfo *MRI) const {
1736 unsigned MaskOpcode = Mask->getOpcode();
1737 unsigned PredOpcode = Pred->getOpcode();
1738 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1739 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1740
1741 if (PredIsWhileLike) {
1742 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1743 // instruction and the condition is "any" since WHILcc does an implicit
1744 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1745 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1746 return PredOpcode;
1747
1748 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1749 // redundant since WHILE performs an implicit PTEST with an all active
1750 // mask.
1751 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1752 getElementSizeForOpcode(MaskOpcode) ==
1753 getElementSizeForOpcode(PredOpcode))
1754 return PredOpcode;
1755
1756 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1757 // WHILEcc performs an implicit PTEST with an all active mask, setting
1758 // the N flag as the PTEST_FIRST would.
1759 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1760 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1761 return PredOpcode;
1762
1763 return {};
1764 }
1765
1766 if (PredIsPTestLike) {
1767 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1768 // instruction that sets the flags as PTEST would and the condition is
1769 // "any" since PG is always a subset of the governing predicate of the
1770 // ptest-like instruction.
1771 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1772 return PredOpcode;
1773
1774 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1775
1776 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1777 // to look through a copy and try again. This is because some instructions
1778 // take a predicate whose register class is a subset of its result class.
1779 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1780 PTestLikeMask->getOperand(1).getReg().isVirtual())
1781 PTestLikeMask =
1782 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1783
1784 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1785 // the element size matches and either the PTEST_LIKE instruction uses
1786 // the same all active mask or the condition is "any".
1787 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1788 getElementSizeForOpcode(MaskOpcode) ==
1789 getElementSizeForOpcode(PredOpcode)) {
1790 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1791 return PredOpcode;
1792 }
1793
1794 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1795 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1796 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1797 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1798 // performed by the compare could consider fewer lanes for these element
1799 // sizes.
1800 //
1801 // For example, consider
1802 //
1803 // ptrue p0.b ; P0=1111-1111-1111-1111
1804 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1805 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1806 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1807 // ; ^ last active
1808 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1809 // ; ^ last active
1810 //
1811 // where the compare generates a canonical all active 32-bit predicate
1812 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1813 // active flag, whereas the PTEST instruction with the same mask doesn't.
1814 // For PTEST_ANY this doesn't apply as the flags in this case would be
1815 // identical regardless of element size.
1816 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1817 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1818 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1819 return PredOpcode;
1820
1821 return {};
1822 }
1823
1824 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1825 // opcode so the PTEST becomes redundant.
1826 switch (PredOpcode) {
1827 case AArch64::AND_PPzPP:
1828 case AArch64::BIC_PPzPP:
1829 case AArch64::EOR_PPzPP:
1830 case AArch64::NAND_PPzPP:
1831 case AArch64::NOR_PPzPP:
1832 case AArch64::ORN_PPzPP:
1833 case AArch64::ORR_PPzPP:
1834 case AArch64::BRKA_PPzP:
1835 case AArch64::BRKPA_PPzPP:
1836 case AArch64::BRKB_PPzP:
1837 case AArch64::BRKPB_PPzPP:
1838 case AArch64::RDFFR_PPz: {
1839 // Check to see if our mask is the same. If not the resulting flag bits
1840 // may be different and we can't remove the ptest.
1841 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1842 if (Mask != PredMask)
1843 return {};
1844 break;
1845 }
1846 case AArch64::BRKN_PPzP: {
1847 // BRKN uses an all active implicit mask to set flags unlike the other
1848 // flag-setting instructions.
1849 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1850 if ((MaskOpcode != AArch64::PTRUE_B) ||
1851 (Mask->getOperand(1).getImm() != 31))
1852 return {};
1853 break;
1854 }
1855 case AArch64::PTRUE_B:
1856 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1857 break;
1858 default:
1859 // Bail out if we don't recognize the input
1860 return {};
1861 }
1862
1863 return convertToFlagSettingOpc(PredOpcode);
1864}
1865
1866/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1867/// operation which could set the flags in an identical manner
1868bool AArch64InstrInfo::optimizePTestInstr(
1869 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1870 const MachineRegisterInfo *MRI) const {
1871 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1872 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1873
1874 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1875 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1876 // before the branch to extract each subregister.
1877 auto Op = Pred->getOperand(1);
1878 if (Op.isReg() && Op.getReg().isVirtual() &&
1879 Op.getSubReg() == AArch64::psub0)
1880 Pred = MRI->getUniqueVRegDef(Op.getReg());
1881 }
1882
1883 unsigned PredOpcode = Pred->getOpcode();
1884 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1885 if (!NewOp)
1886 return false;
1887
1888 const TargetRegisterInfo *TRI = &getRegisterInfo();
1889
1890 // If another instruction between Pred and PTest accesses flags, don't remove
1891 // the ptest or update the earlier instruction to modify them.
1892 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1893 return false;
1894
1895 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1896 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1897 // operand to be replaced with an equivalent instruction that also sets the
1898 // flags.
1899 PTest->eraseFromParent();
1900 if (*NewOp != PredOpcode) {
1901 Pred->setDesc(get(*NewOp));
1902 bool succeeded = UpdateOperandRegClass(*Pred);
1903 (void)succeeded;
1904 assert(succeeded && "Operands have incompatible register classes!");
1905 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1906 }
1907
1908 // Ensure that the flags def is live.
1909 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1910 unsigned i = 0, e = Pred->getNumOperands();
1911 for (; i != e; ++i) {
1912 MachineOperand &MO = Pred->getOperand(i);
1913 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1914 MO.setIsDead(false);
1915 break;
1916 }
1917 }
1918 }
1919 return true;
1920}
1921
1922/// Try to optimize a compare instruction. A compare instruction is an
1923/// instruction which produces AArch64::NZCV. It can be truly compare
1924/// instruction
1925/// when there are no uses of its destination register.
1926///
1927/// The following steps are tried in order:
1928/// 1. Convert CmpInstr into an unconditional version.
1929/// 2. Remove CmpInstr if above there is an instruction producing a needed
1930/// condition code or an instruction which can be converted into such an
1931/// instruction.
1932/// Only comparison with zero is supported.
1934 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1935 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1936 assert(CmpInstr.getParent());
1937 assert(MRI);
1938
1939 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1940 int DeadNZCVIdx =
1941 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1942 if (DeadNZCVIdx != -1) {
1943 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1944 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1945 CmpInstr.eraseFromParent();
1946 return true;
1947 }
1948 unsigned Opc = CmpInstr.getOpcode();
1949 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1950 if (NewOpc == Opc)
1951 return false;
1952 const MCInstrDesc &MCID = get(NewOpc);
1953 CmpInstr.setDesc(MCID);
1954 CmpInstr.removeOperand(DeadNZCVIdx);
1955 bool succeeded = UpdateOperandRegClass(CmpInstr);
1956 (void)succeeded;
1957 assert(succeeded && "Some operands reg class are incompatible!");
1958 return true;
1959 }
1960
1961 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1962 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1963 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1964 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1965
1966 if (SrcReg2 != 0)
1967 return false;
1968
1969 // CmpInstr is a Compare instruction if destination register is not used.
1970 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1971 return false;
1972
1973 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1974 return true;
1975 return (CmpValue == 0 || CmpValue == 1) &&
1976 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1977}
1978
1979/// Get opcode of S version of Instr.
1980/// If Instr is S version its opcode is returned.
1981/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1982/// or we are not interested in it.
1983static unsigned sForm(MachineInstr &Instr) {
1984 switch (Instr.getOpcode()) {
1985 default:
1986 return AArch64::INSTRUCTION_LIST_END;
1987
1988 case AArch64::ADDSWrr:
1989 case AArch64::ADDSWri:
1990 case AArch64::ADDSXrr:
1991 case AArch64::ADDSXri:
1992 case AArch64::ADDSWrx:
1993 case AArch64::ADDSXrx:
1994 case AArch64::SUBSWrr:
1995 case AArch64::SUBSWri:
1996 case AArch64::SUBSWrx:
1997 case AArch64::SUBSXrr:
1998 case AArch64::SUBSXri:
1999 case AArch64::SUBSXrx:
2000 case AArch64::ANDSWri:
2001 case AArch64::ANDSWrr:
2002 case AArch64::ANDSWrs:
2003 case AArch64::ANDSXri:
2004 case AArch64::ANDSXrr:
2005 case AArch64::ANDSXrs:
2006 case AArch64::BICSWrr:
2007 case AArch64::BICSXrr:
2008 case AArch64::BICSWrs:
2009 case AArch64::BICSXrs:
2010 return Instr.getOpcode();
2011
2012 case AArch64::ADDWrr:
2013 return AArch64::ADDSWrr;
2014 case AArch64::ADDWri:
2015 return AArch64::ADDSWri;
2016 case AArch64::ADDXrr:
2017 return AArch64::ADDSXrr;
2018 case AArch64::ADDXri:
2019 return AArch64::ADDSXri;
2020 case AArch64::ADDWrx:
2021 return AArch64::ADDSWrx;
2022 case AArch64::ADDXrx:
2023 return AArch64::ADDSXrx;
2024 case AArch64::ADCWr:
2025 return AArch64::ADCSWr;
2026 case AArch64::ADCXr:
2027 return AArch64::ADCSXr;
2028 case AArch64::SUBWrr:
2029 return AArch64::SUBSWrr;
2030 case AArch64::SUBWri:
2031 return AArch64::SUBSWri;
2032 case AArch64::SUBXrr:
2033 return AArch64::SUBSXrr;
2034 case AArch64::SUBXri:
2035 return AArch64::SUBSXri;
2036 case AArch64::SUBWrx:
2037 return AArch64::SUBSWrx;
2038 case AArch64::SUBXrx:
2039 return AArch64::SUBSXrx;
2040 case AArch64::SBCWr:
2041 return AArch64::SBCSWr;
2042 case AArch64::SBCXr:
2043 return AArch64::SBCSXr;
2044 case AArch64::ANDWri:
2045 return AArch64::ANDSWri;
2046 case AArch64::ANDXri:
2047 return AArch64::ANDSXri;
2048 case AArch64::ANDWrr:
2049 return AArch64::ANDSWrr;
2050 case AArch64::ANDWrs:
2051 return AArch64::ANDSWrs;
2052 case AArch64::ANDXrr:
2053 return AArch64::ANDSXrr;
2054 case AArch64::ANDXrs:
2055 return AArch64::ANDSXrs;
2056 case AArch64::BICWrr:
2057 return AArch64::BICSWrr;
2058 case AArch64::BICXrr:
2059 return AArch64::BICSXrr;
2060 case AArch64::BICWrs:
2061 return AArch64::BICSWrs;
2062 case AArch64::BICXrs:
2063 return AArch64::BICSXrs;
2064 }
2065}
2066
2067/// Check if AArch64::NZCV should be alive in successors of MBB.
2069 for (auto *BB : MBB->successors())
2070 if (BB->isLiveIn(AArch64::NZCV))
2071 return true;
2072 return false;
2073}
2074
2075/// \returns The condition code operand index for \p Instr if it is a branch
2076/// or select and -1 otherwise.
2077static int
2079 switch (Instr.getOpcode()) {
2080 default:
2081 return -1;
2082
2083 case AArch64::Bcc: {
2084 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2085 assert(Idx >= 2);
2086 return Idx - 2;
2087 }
2088
2089 case AArch64::CSINVWr:
2090 case AArch64::CSINVXr:
2091 case AArch64::CSINCWr:
2092 case AArch64::CSINCXr:
2093 case AArch64::CSELWr:
2094 case AArch64::CSELXr:
2095 case AArch64::CSNEGWr:
2096 case AArch64::CSNEGXr:
2097 case AArch64::FCSELSrrr:
2098 case AArch64::FCSELDrrr: {
2099 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2100 assert(Idx >= 1);
2101 return Idx - 1;
2102 }
2103 }
2104}
2105
2106/// Find a condition code used by the instruction.
2107/// Returns AArch64CC::Invalid if either the instruction does not use condition
2108/// codes or we don't optimize CmpInstr in the presence of such instructions.
2111 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
2112 Instr.getOperand(CCIdx).getImm())
2114}
2115
2118 UsedNZCV UsedFlags;
2119 switch (CC) {
2120 default:
2121 break;
2122
2123 case AArch64CC::EQ: // Z set
2124 case AArch64CC::NE: // Z clear
2125 UsedFlags.Z = true;
2126 break;
2127
2128 case AArch64CC::HI: // Z clear and C set
2129 case AArch64CC::LS: // Z set or C clear
2130 UsedFlags.Z = true;
2131 [[fallthrough]];
2132 case AArch64CC::HS: // C set
2133 case AArch64CC::LO: // C clear
2134 UsedFlags.C = true;
2135 break;
2136
2137 case AArch64CC::MI: // N set
2138 case AArch64CC::PL: // N clear
2139 UsedFlags.N = true;
2140 break;
2141
2142 case AArch64CC::VS: // V set
2143 case AArch64CC::VC: // V clear
2144 UsedFlags.V = true;
2145 break;
2146
2147 case AArch64CC::GT: // Z clear, N and V the same
2148 case AArch64CC::LE: // Z set, N and V differ
2149 UsedFlags.Z = true;
2150 [[fallthrough]];
2151 case AArch64CC::GE: // N and V the same
2152 case AArch64CC::LT: // N and V differ
2153 UsedFlags.N = true;
2154 UsedFlags.V = true;
2155 break;
2156 }
2157 return UsedFlags;
2158}
2159
2160/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
2161/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
2162/// \returns std::nullopt otherwise.
2163///
2164/// Collect instructions using that flags in \p CCUseInstrs if provided.
2165std::optional<UsedNZCV>
2167 const TargetRegisterInfo &TRI,
2168 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
2169 MachineBasicBlock *CmpParent = CmpInstr.getParent();
2170 if (MI.getParent() != CmpParent)
2171 return std::nullopt;
2172
2173 if (areCFlagsAliveInSuccessors(CmpParent))
2174 return std::nullopt;
2175
2176 UsedNZCV NZCVUsedAfterCmp;
2178 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
2179 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
2181 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
2182 return std::nullopt;
2183 NZCVUsedAfterCmp |= getUsedNZCV(CC);
2184 if (CCUseInstrs)
2185 CCUseInstrs->push_back(&Instr);
2186 }
2187 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
2188 break;
2189 }
2190 return NZCVUsedAfterCmp;
2191}
2192
2193static bool isADDSRegImm(unsigned Opcode) {
2194 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
2195}
2196
2197static bool isSUBSRegImm(unsigned Opcode) {
2198 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
2199}
2200
2202 unsigned Opc = sForm(MI);
2203 switch (Opc) {
2204 case AArch64::ANDSWri:
2205 case AArch64::ANDSWrr:
2206 case AArch64::ANDSWrs:
2207 case AArch64::ANDSXri:
2208 case AArch64::ANDSXrr:
2209 case AArch64::ANDSXrs:
2210 case AArch64::BICSWrr:
2211 case AArch64::BICSXrr:
2212 case AArch64::BICSWrs:
2213 case AArch64::BICSXrs:
2214 return true;
2215 default:
2216 return false;
2217 }
2218}
2219
2220/// Check if CmpInstr can be substituted by MI.
2221///
2222/// CmpInstr can be substituted:
2223/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2224/// - and, MI and CmpInstr are from the same MachineBB
2225/// - and, condition flags are not alive in successors of the CmpInstr parent
2226/// - and, if MI opcode is the S form there must be no defs of flags between
2227/// MI and CmpInstr
2228/// or if MI opcode is not the S form there must be neither defs of flags
2229/// nor uses of flags between MI and CmpInstr.
2230/// - and, if C/V flags are not used after CmpInstr
2231/// or if N flag is used but MI produces poison value if signed overflow
2232/// occurs.
2234 const TargetRegisterInfo &TRI) {
2235 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
2236 // that may or may not set flags.
2237 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
2238
2239 const unsigned CmpOpcode = CmpInstr.getOpcode();
2240 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
2241 return false;
2242
2243 assert((CmpInstr.getOperand(2).isImm() &&
2244 CmpInstr.getOperand(2).getImm() == 0) &&
2245 "Caller guarantees that CmpInstr compares with constant 0");
2246
2247 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
2248 if (!NZVCUsed || NZVCUsed->C)
2249 return false;
2250
2251 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
2252 // '%vreg = add ...' or '%vreg = sub ...'.
2253 // Condition flag V is used to indicate signed overflow.
2254 // 1) MI and CmpInstr set N and V to the same value.
2255 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
2256 // signed overflow occurs, so CmpInstr could still be simplified away.
2257 // Note that Ands and Bics instructions always clear the V flag.
2258 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
2259 return false;
2260
2261 AccessKind AccessToCheck = AK_Write;
2262 if (sForm(MI) != MI.getOpcode())
2263 AccessToCheck = AK_All;
2264 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
2265}
2266
2267/// Substitute an instruction comparing to zero with another instruction
2268/// which produces needed condition flags.
2269///
2270/// Return true on success.
2271bool AArch64InstrInfo::substituteCmpToZero(
2272 MachineInstr &CmpInstr, unsigned SrcReg,
2273 const MachineRegisterInfo &MRI) const {
2274 // Get the unique definition of SrcReg.
2275 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2276 if (!MI)
2277 return false;
2278
2279 const TargetRegisterInfo &TRI = getRegisterInfo();
2280
2281 unsigned NewOpc = sForm(*MI);
2282 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
2283 return false;
2284
2285 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
2286 return false;
2287
2288 // Update the instruction to set NZCV.
2289 MI->setDesc(get(NewOpc));
2290 CmpInstr.eraseFromParent();
2292 (void)succeeded;
2293 assert(succeeded && "Some operands reg class are incompatible!");
2294 MI->addRegisterDefined(AArch64::NZCV, &TRI);
2295 return true;
2296}
2297
2298/// \returns True if \p CmpInstr can be removed.
2299///
2300/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
2301/// codes used in \p CCUseInstrs must be inverted.
2303 int CmpValue, const TargetRegisterInfo &TRI,
2305 bool &IsInvertCC) {
2306 assert((CmpValue == 0 || CmpValue == 1) &&
2307 "Only comparisons to 0 or 1 considered for removal!");
2308
2309 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2310 unsigned MIOpc = MI.getOpcode();
2311 if (MIOpc == AArch64::CSINCWr) {
2312 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2313 MI.getOperand(2).getReg() != AArch64::WZR)
2314 return false;
2315 } else if (MIOpc == AArch64::CSINCXr) {
2316 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2317 MI.getOperand(2).getReg() != AArch64::XZR)
2318 return false;
2319 } else {
2320 return false;
2321 }
2323 if (MICC == AArch64CC::Invalid)
2324 return false;
2325
2326 // NZCV needs to be defined
2327 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2328 return false;
2329
2330 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2331 const unsigned CmpOpcode = CmpInstr.getOpcode();
2332 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2333 if (CmpValue && !IsSubsRegImm)
2334 return false;
2335 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2336 return false;
2337
2338 // MI conditions allowed: eq, ne, mi, pl
2339 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2340 if (MIUsedNZCV.C || MIUsedNZCV.V)
2341 return false;
2342
2343 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2344 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2345 // Condition flags are not used in CmpInstr basic block successors and only
2346 // Z or N flags allowed to be used after CmpInstr within its basic block
2347 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2348 return false;
2349 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2350 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2351 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2352 return false;
2353 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2354 if (MIUsedNZCV.N && !CmpValue)
2355 return false;
2356
2357 // There must be no defs of flags between MI and CmpInstr
2358 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2359 return false;
2360
2361 // Condition code is inverted in the following cases:
2362 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2363 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2364 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2365 (!CmpValue && MICC == AArch64CC::NE);
2366 return true;
2367}
2368
2369/// Remove comparison in csinc-cmp sequence
2370///
2371/// Examples:
2372/// 1. \code
2373/// csinc w9, wzr, wzr, ne
2374/// cmp w9, #0
2375/// b.eq
2376/// \endcode
2377/// to
2378/// \code
2379/// csinc w9, wzr, wzr, ne
2380/// b.ne
2381/// \endcode
2382///
2383/// 2. \code
2384/// csinc x2, xzr, xzr, mi
2385/// cmp x2, #1
2386/// b.pl
2387/// \endcode
2388/// to
2389/// \code
2390/// csinc x2, xzr, xzr, mi
2391/// b.pl
2392/// \endcode
2393///
2394/// \param CmpInstr comparison instruction
2395/// \return True when comparison removed
2396bool AArch64InstrInfo::removeCmpToZeroOrOne(
2397 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2398 const MachineRegisterInfo &MRI) const {
2399 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2400 if (!MI)
2401 return false;
2402 const TargetRegisterInfo &TRI = getRegisterInfo();
2403 SmallVector<MachineInstr *, 4> CCUseInstrs;
2404 bool IsInvertCC = false;
2405 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2406 IsInvertCC))
2407 return false;
2408 // Make transformation
2409 CmpInstr.eraseFromParent();
2410 if (IsInvertCC) {
2411 // Invert condition codes in CmpInstr CC users
2412 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2413 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2414 assert(Idx >= 0 && "Unexpected instruction using CC.");
2415 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2417 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2418 CCOperand.setImm(CCUse);
2419 }
2420 }
2421 return true;
2422}
2423
2424bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2425 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2426 MI.getOpcode() != AArch64::CATCHRET)
2427 return false;
2428
2429 MachineBasicBlock &MBB = *MI.getParent();
2430 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2431 auto TRI = Subtarget.getRegisterInfo();
2432 DebugLoc DL = MI.getDebugLoc();
2433
2434 if (MI.getOpcode() == AArch64::CATCHRET) {
2435 // Skip to the first instruction before the epilog.
2436 const TargetInstrInfo *TII =
2438 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2440 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2441 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2442 FirstEpilogSEH != MBB.begin())
2443 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2444 if (FirstEpilogSEH != MBB.begin())
2445 FirstEpilogSEH = std::next(FirstEpilogSEH);
2446 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2447 .addReg(AArch64::X0, RegState::Define)
2448 .addMBB(TargetMBB);
2449 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2450 .addReg(AArch64::X0, RegState::Define)
2451 .addReg(AArch64::X0)
2452 .addMBB(TargetMBB)
2453 .addImm(0);
2454 TargetMBB->setMachineBlockAddressTaken();
2455 return true;
2456 }
2457
2458 Register Reg = MI.getOperand(0).getReg();
2460 if (M.getStackProtectorGuard() == "sysreg") {
2461 const AArch64SysReg::SysReg *SrcReg =
2462 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2463 if (!SrcReg)
2464 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2465
2466 // mrs xN, sysreg
2467 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2469 .addImm(SrcReg->Encoding);
2470 int Offset = M.getStackProtectorGuardOffset();
2471 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2472 // ldr xN, [xN, #offset]
2473 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2474 .addDef(Reg)
2476 .addImm(Offset / 8);
2477 } else if (Offset >= -256 && Offset <= 255) {
2478 // ldur xN, [xN, #offset]
2479 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2480 .addDef(Reg)
2482 .addImm(Offset);
2483 } else if (Offset >= -4095 && Offset <= 4095) {
2484 if (Offset > 0) {
2485 // add xN, xN, #offset
2486 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2487 .addDef(Reg)
2489 .addImm(Offset)
2490 .addImm(0);
2491 } else {
2492 // sub xN, xN, #offset
2493 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2494 .addDef(Reg)
2496 .addImm(-Offset)
2497 .addImm(0);
2498 }
2499 // ldr xN, [xN]
2500 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2501 .addDef(Reg)
2503 .addImm(0);
2504 } else {
2505 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2506 // than 23760.
2507 // It might be nice to use AArch64::MOVi32imm here, which would get
2508 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2509 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2510 // AArch64FrameLowering might help us find such a scratch register
2511 // though. If we failed to find a scratch register, we could emit a
2512 // stream of add instructions to build up the immediate. Or, we could try
2513 // to insert a AArch64::MOVi32imm before register allocation so that we
2514 // didn't need to scavenge for a scratch register.
2515 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2516 }
2517 MBB.erase(MI);
2518 return true;
2519 }
2520
2521 const GlobalValue *GV =
2522 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2523 const TargetMachine &TM = MBB.getParent()->getTarget();
2524 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2525 const unsigned char MO_NC = AArch64II::MO_NC;
2526
2527 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2528 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2529 .addGlobalAddress(GV, 0, OpFlags);
2530 if (Subtarget.isTargetILP32()) {
2531 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2532 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2533 .addDef(Reg32, RegState::Dead)
2535 .addImm(0)
2536 .addMemOperand(*MI.memoperands_begin())
2538 } else {
2539 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2541 .addImm(0)
2542 .addMemOperand(*MI.memoperands_begin());
2543 }
2544 } else if (TM.getCodeModel() == CodeModel::Large) {
2545 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2546 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2547 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2548 .addImm(0);
2549 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2551 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2552 .addImm(16);
2553 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2555 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2556 .addImm(32);
2557 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2560 .addImm(48);
2561 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2563 .addImm(0)
2564 .addMemOperand(*MI.memoperands_begin());
2565 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2566 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2567 .addGlobalAddress(GV, 0, OpFlags);
2568 } else {
2569 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2570 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2571 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2572 if (Subtarget.isTargetILP32()) {
2573 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2574 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2575 .addDef(Reg32, RegState::Dead)
2577 .addGlobalAddress(GV, 0, LoFlags)
2578 .addMemOperand(*MI.memoperands_begin())
2580 } else {
2581 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2583 .addGlobalAddress(GV, 0, LoFlags)
2584 .addMemOperand(*MI.memoperands_begin());
2585 }
2586 }
2587
2588 MBB.erase(MI);
2589
2590 return true;
2591}
2592
2593// Return true if this instruction simply sets its single destination register
2594// to zero. This is equivalent to a register rename of the zero-register.
2596 switch (MI.getOpcode()) {
2597 default:
2598 break;
2599 case AArch64::MOVZWi:
2600 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2601 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2602 assert(MI.getDesc().getNumOperands() == 3 &&
2603 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2604 return true;
2605 }
2606 break;
2607 case AArch64::ANDWri: // and Rd, Rzr, #imm
2608 return MI.getOperand(1).getReg() == AArch64::WZR;
2609 case AArch64::ANDXri:
2610 return MI.getOperand(1).getReg() == AArch64::XZR;
2611 case TargetOpcode::COPY:
2612 return MI.getOperand(1).getReg() == AArch64::WZR;
2613 }
2614 return false;
2615}
2616
2617// Return true if this instruction simply renames a general register without
2618// modifying bits.
2620 switch (MI.getOpcode()) {
2621 default:
2622 break;
2623 case TargetOpcode::COPY: {
2624 // GPR32 copies will by lowered to ORRXrs
2625 Register DstReg = MI.getOperand(0).getReg();
2626 return (AArch64::GPR32RegClass.contains(DstReg) ||
2627 AArch64::GPR64RegClass.contains(DstReg));
2628 }
2629 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2630 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2631 assert(MI.getDesc().getNumOperands() == 4 &&
2632 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2633 return true;
2634 }
2635 break;
2636 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2637 if (MI.getOperand(2).getImm() == 0) {
2638 assert(MI.getDesc().getNumOperands() == 4 &&
2639 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2640 return true;
2641 }
2642 break;
2643 }
2644 return false;
2645}
2646
2647// Return true if this instruction simply renames a general register without
2648// modifying bits.
2650 switch (MI.getOpcode()) {
2651 default:
2652 break;
2653 case TargetOpcode::COPY: {
2654 Register DstReg = MI.getOperand(0).getReg();
2655 return AArch64::FPR128RegClass.contains(DstReg);
2656 }
2657 case AArch64::ORRv16i8:
2658 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2659 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2660 "invalid ORRv16i8 operands");
2661 return true;
2662 }
2663 break;
2664 }
2665 return false;
2666}
2667
2668static bool isFrameLoadOpcode(int Opcode) {
2669 switch (Opcode) {
2670 default:
2671 return false;
2672 case AArch64::LDRWui:
2673 case AArch64::LDRXui:
2674 case AArch64::LDRBui:
2675 case AArch64::LDRHui:
2676 case AArch64::LDRSui:
2677 case AArch64::LDRDui:
2678 case AArch64::LDRQui:
2679 case AArch64::LDR_PXI:
2680 return true;
2681 }
2682}
2683
2685 int &FrameIndex) const {
2686 if (!isFrameLoadOpcode(MI.getOpcode()))
2687 return Register();
2688
2689 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2690 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2691 FrameIndex = MI.getOperand(1).getIndex();
2692 return MI.getOperand(0).getReg();
2693 }
2694 return Register();
2695}
2696
2697static bool isFrameStoreOpcode(int Opcode) {
2698 switch (Opcode) {
2699 default:
2700 return false;
2701 case AArch64::STRWui:
2702 case AArch64::STRXui:
2703 case AArch64::STRBui:
2704 case AArch64::STRHui:
2705 case AArch64::STRSui:
2706 case AArch64::STRDui:
2707 case AArch64::STRQui:
2708 case AArch64::STR_PXI:
2709 return true;
2710 }
2711}
2712
2714 int &FrameIndex) const {
2715 if (!isFrameStoreOpcode(MI.getOpcode()))
2716 return Register();
2717
2718 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2719 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2720 FrameIndex = MI.getOperand(1).getIndex();
2721 return MI.getOperand(0).getReg();
2722 }
2723 return Register();
2724}
2725
2727 int &FrameIndex) const {
2728 if (!isFrameStoreOpcode(MI.getOpcode()))
2729 return Register();
2730
2731 if (Register Reg = isStoreToStackSlot(MI, FrameIndex))
2732 return Reg;
2733
2735 if (hasStoreToStackSlot(MI, Accesses)) {
2736 if (Accesses.size() > 1)
2737 return Register();
2738
2739 FrameIndex =
2740 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2741 ->getFrameIndex();
2742 return MI.getOperand(0).getReg();
2743 }
2744 return Register();
2745}
2746
2748 int &FrameIndex) const {
2749 if (!isFrameLoadOpcode(MI.getOpcode()))
2750 return Register();
2751
2752 if (Register Reg = isLoadFromStackSlot(MI, FrameIndex))
2753 return Reg;
2754
2756 if (hasLoadFromStackSlot(MI, Accesses)) {
2757 if (Accesses.size() > 1)
2758 return Register();
2759
2760 FrameIndex =
2761 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2762 ->getFrameIndex();
2763 return MI.getOperand(0).getReg();
2764 }
2765 return Register();
2766}
2767
2768/// Check all MachineMemOperands for a hint to suppress pairing.
2770 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2771 return MMO->getFlags() & MOSuppressPair;
2772 });
2773}
2774
2775/// Set a flag on the first MachineMemOperand to suppress pairing.
2777 if (MI.memoperands_empty())
2778 return;
2779 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2780}
2781
2782/// Check all MachineMemOperands for a hint that the load/store is strided.
2784 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2785 return MMO->getFlags() & MOStridedAccess;
2786 });
2787}
2788
2790 switch (Opc) {
2791 default:
2792 return false;
2793 case AArch64::STURSi:
2794 case AArch64::STRSpre:
2795 case AArch64::STURDi:
2796 case AArch64::STRDpre:
2797 case AArch64::STURQi:
2798 case AArch64::STRQpre:
2799 case AArch64::STURBBi:
2800 case AArch64::STURHHi:
2801 case AArch64::STURWi:
2802 case AArch64::STRWpre:
2803 case AArch64::STURXi:
2804 case AArch64::STRXpre:
2805 case AArch64::LDURSi:
2806 case AArch64::LDRSpre:
2807 case AArch64::LDURDi:
2808 case AArch64::LDRDpre:
2809 case AArch64::LDURQi:
2810 case AArch64::LDRQpre:
2811 case AArch64::LDURWi:
2812 case AArch64::LDRWpre:
2813 case AArch64::LDURXi:
2814 case AArch64::LDRXpre:
2815 case AArch64::LDRSWpre:
2816 case AArch64::LDURSWi:
2817 case AArch64::LDURHHi:
2818 case AArch64::LDURBBi:
2819 case AArch64::LDURSBWi:
2820 case AArch64::LDURSHWi:
2821 return true;
2822 }
2823}
2824
2825std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2826 switch (Opc) {
2827 default: return {};
2828 case AArch64::PRFMui: return AArch64::PRFUMi;
2829 case AArch64::LDRXui: return AArch64::LDURXi;
2830 case AArch64::LDRWui: return AArch64::LDURWi;
2831 case AArch64::LDRBui: return AArch64::LDURBi;
2832 case AArch64::LDRHui: return AArch64::LDURHi;
2833 case AArch64::LDRSui: return AArch64::LDURSi;
2834 case AArch64::LDRDui: return AArch64::LDURDi;
2835 case AArch64::LDRQui: return AArch64::LDURQi;
2836 case AArch64::LDRBBui: return AArch64::LDURBBi;
2837 case AArch64::LDRHHui: return AArch64::LDURHHi;
2838 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2839 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2840 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2841 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2842 case AArch64::LDRSWui: return AArch64::LDURSWi;
2843 case AArch64::STRXui: return AArch64::STURXi;
2844 case AArch64::STRWui: return AArch64::STURWi;
2845 case AArch64::STRBui: return AArch64::STURBi;
2846 case AArch64::STRHui: return AArch64::STURHi;
2847 case AArch64::STRSui: return AArch64::STURSi;
2848 case AArch64::STRDui: return AArch64::STURDi;
2849 case AArch64::STRQui: return AArch64::STURQi;
2850 case AArch64::STRBBui: return AArch64::STURBBi;
2851 case AArch64::STRHHui: return AArch64::STURHHi;
2852 }
2853}
2854
2856 switch (Opc) {
2857 default:
2858 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2859 case AArch64::ADDG:
2860 case AArch64::LDAPURBi:
2861 case AArch64::LDAPURHi:
2862 case AArch64::LDAPURi:
2863 case AArch64::LDAPURSBWi:
2864 case AArch64::LDAPURSBXi:
2865 case AArch64::LDAPURSHWi:
2866 case AArch64::LDAPURSHXi:
2867 case AArch64::LDAPURSWi:
2868 case AArch64::LDAPURXi:
2869 case AArch64::LDR_PPXI:
2870 case AArch64::LDR_PXI:
2871 case AArch64::LDR_ZXI:
2872 case AArch64::LDR_ZZXI:
2873 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2874 case AArch64::LDR_ZZZXI:
2875 case AArch64::LDR_ZZZZXI:
2876 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2877 case AArch64::LDRBBui:
2878 case AArch64::LDRBui:
2879 case AArch64::LDRDui:
2880 case AArch64::LDRHHui:
2881 case AArch64::LDRHui:
2882 case AArch64::LDRQui:
2883 case AArch64::LDRSBWui:
2884 case AArch64::LDRSBXui:
2885 case AArch64::LDRSHWui:
2886 case AArch64::LDRSHXui:
2887 case AArch64::LDRSui:
2888 case AArch64::LDRSWui:
2889 case AArch64::LDRWui:
2890 case AArch64::LDRXui:
2891 case AArch64::LDURBBi:
2892 case AArch64::LDURBi:
2893 case AArch64::LDURDi:
2894 case AArch64::LDURHHi:
2895 case AArch64::LDURHi:
2896 case AArch64::LDURQi:
2897 case AArch64::LDURSBWi:
2898 case AArch64::LDURSBXi:
2899 case AArch64::LDURSHWi:
2900 case AArch64::LDURSHXi:
2901 case AArch64::LDURSi:
2902 case AArch64::LDURSWi:
2903 case AArch64::LDURWi:
2904 case AArch64::LDURXi:
2905 case AArch64::PRFMui:
2906 case AArch64::PRFUMi:
2907 case AArch64::ST2Gi:
2908 case AArch64::STGi:
2909 case AArch64::STLURBi:
2910 case AArch64::STLURHi:
2911 case AArch64::STLURWi:
2912 case AArch64::STLURXi:
2913 case AArch64::StoreSwiftAsyncContext:
2914 case AArch64::STR_PPXI:
2915 case AArch64::STR_PXI:
2916 case AArch64::STR_ZXI:
2917 case AArch64::STR_ZZXI:
2918 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2919 case AArch64::STR_ZZZXI:
2920 case AArch64::STR_ZZZZXI:
2921 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2922 case AArch64::STRBBui:
2923 case AArch64::STRBui:
2924 case AArch64::STRDui:
2925 case AArch64::STRHHui:
2926 case AArch64::STRHui:
2927 case AArch64::STRQui:
2928 case AArch64::STRSui:
2929 case AArch64::STRWui:
2930 case AArch64::STRXui:
2931 case AArch64::STURBBi:
2932 case AArch64::STURBi:
2933 case AArch64::STURDi:
2934 case AArch64::STURHHi:
2935 case AArch64::STURHi:
2936 case AArch64::STURQi:
2937 case AArch64::STURSi:
2938 case AArch64::STURWi:
2939 case AArch64::STURXi:
2940 case AArch64::STZ2Gi:
2941 case AArch64::STZGi:
2942 case AArch64::TAGPstack:
2943 return 2;
2944 case AArch64::LD1B_D_IMM:
2945 case AArch64::LD1B_H_IMM:
2946 case AArch64::LD1B_IMM:
2947 case AArch64::LD1B_S_IMM:
2948 case AArch64::LD1D_IMM:
2949 case AArch64::LD1H_D_IMM:
2950 case AArch64::LD1H_IMM:
2951 case AArch64::LD1H_S_IMM:
2952 case AArch64::LD1RB_D_IMM:
2953 case AArch64::LD1RB_H_IMM:
2954 case AArch64::LD1RB_IMM:
2955 case AArch64::LD1RB_S_IMM:
2956 case AArch64::LD1RD_IMM:
2957 case AArch64::LD1RH_D_IMM:
2958 case AArch64::LD1RH_IMM:
2959 case AArch64::LD1RH_S_IMM:
2960 case AArch64::LD1RSB_D_IMM:
2961 case AArch64::LD1RSB_H_IMM:
2962 case AArch64::LD1RSB_S_IMM:
2963 case AArch64::LD1RSH_D_IMM:
2964 case AArch64::LD1RSH_S_IMM:
2965 case AArch64::LD1RSW_IMM:
2966 case AArch64::LD1RW_D_IMM:
2967 case AArch64::LD1RW_IMM:
2968 case AArch64::LD1SB_D_IMM:
2969 case AArch64::LD1SB_H_IMM:
2970 case AArch64::LD1SB_S_IMM:
2971 case AArch64::LD1SH_D_IMM:
2972 case AArch64::LD1SH_S_IMM:
2973 case AArch64::LD1SW_D_IMM:
2974 case AArch64::LD1W_D_IMM:
2975 case AArch64::LD1W_IMM:
2976 case AArch64::LD2B_IMM:
2977 case AArch64::LD2D_IMM:
2978 case AArch64::LD2H_IMM:
2979 case AArch64::LD2W_IMM:
2980 case AArch64::LD3B_IMM:
2981 case AArch64::LD3D_IMM:
2982 case AArch64::LD3H_IMM:
2983 case AArch64::LD3W_IMM:
2984 case AArch64::LD4B_IMM:
2985 case AArch64::LD4D_IMM:
2986 case AArch64::LD4H_IMM:
2987 case AArch64::LD4W_IMM:
2988 case AArch64::LDG:
2989 case AArch64::LDNF1B_D_IMM:
2990 case AArch64::LDNF1B_H_IMM:
2991 case AArch64::LDNF1B_IMM:
2992 case AArch64::LDNF1B_S_IMM:
2993 case AArch64::LDNF1D_IMM:
2994 case AArch64::LDNF1H_D_IMM:
2995 case AArch64::LDNF1H_IMM:
2996 case AArch64::LDNF1H_S_IMM:
2997 case AArch64::LDNF1SB_D_IMM:
2998 case AArch64::LDNF1SB_H_IMM:
2999 case AArch64::LDNF1SB_S_IMM:
3000 case AArch64::LDNF1SH_D_IMM:
3001 case AArch64::LDNF1SH_S_IMM:
3002 case AArch64::LDNF1SW_D_IMM:
3003 case AArch64::LDNF1W_D_IMM:
3004 case AArch64::LDNF1W_IMM:
3005 case AArch64::LDNPDi:
3006 case AArch64::LDNPQi:
3007 case AArch64::LDNPSi:
3008 case AArch64::LDNPWi:
3009 case AArch64::LDNPXi:
3010 case AArch64::LDNT1B_ZRI:
3011 case AArch64::LDNT1D_ZRI:
3012 case AArch64::LDNT1H_ZRI:
3013 case AArch64::LDNT1W_ZRI:
3014 case AArch64::LDPDi:
3015 case AArch64::LDPQi:
3016 case AArch64::LDPSi:
3017 case AArch64::LDPWi:
3018 case AArch64::LDPXi:
3019 case AArch64::LDRBBpost:
3020 case AArch64::LDRBBpre:
3021 case AArch64::LDRBpost:
3022 case AArch64::LDRBpre:
3023 case AArch64::LDRDpost:
3024 case AArch64::LDRDpre:
3025 case AArch64::LDRHHpost:
3026 case AArch64::LDRHHpre:
3027 case AArch64::LDRHpost:
3028 case AArch64::LDRHpre:
3029 case AArch64::LDRQpost:
3030 case AArch64::LDRQpre:
3031 case AArch64::LDRSpost:
3032 case AArch64::LDRSpre:
3033 case AArch64::LDRWpost:
3034 case AArch64::LDRWpre:
3035 case AArch64::LDRXpost:
3036 case AArch64::LDRXpre:
3037 case AArch64::ST1B_D_IMM:
3038 case AArch64::ST1B_H_IMM:
3039 case AArch64::ST1B_IMM:
3040 case AArch64::ST1B_S_IMM:
3041 case AArch64::ST1D_IMM:
3042 case AArch64::ST1H_D_IMM:
3043 case AArch64::ST1H_IMM:
3044 case AArch64::ST1H_S_IMM:
3045 case AArch64::ST1W_D_IMM:
3046 case AArch64::ST1W_IMM:
3047 case AArch64::ST2B_IMM:
3048 case AArch64::ST2D_IMM:
3049 case AArch64::ST2H_IMM:
3050 case AArch64::ST2W_IMM:
3051 case AArch64::ST3B_IMM:
3052 case AArch64::ST3D_IMM:
3053 case AArch64::ST3H_IMM:
3054 case AArch64::ST3W_IMM:
3055 case AArch64::ST4B_IMM:
3056 case AArch64::ST4D_IMM:
3057 case AArch64::ST4H_IMM:
3058 case AArch64::ST4W_IMM:
3059 case AArch64::STGPi:
3060 case AArch64::STGPreIndex:
3061 case AArch64::STZGPreIndex:
3062 case AArch64::ST2GPreIndex:
3063 case AArch64::STZ2GPreIndex:
3064 case AArch64::STGPostIndex:
3065 case AArch64::STZGPostIndex:
3066 case AArch64::ST2GPostIndex:
3067 case AArch64::STZ2GPostIndex:
3068 case AArch64::STNPDi:
3069 case AArch64::STNPQi:
3070 case AArch64::STNPSi:
3071 case AArch64::STNPWi:
3072 case AArch64::STNPXi:
3073 case AArch64::STNT1B_ZRI:
3074 case AArch64::STNT1D_ZRI:
3075 case AArch64::STNT1H_ZRI:
3076 case AArch64::STNT1W_ZRI:
3077 case AArch64::STPDi:
3078 case AArch64::STPQi:
3079 case AArch64::STPSi:
3080 case AArch64::STPWi:
3081 case AArch64::STPXi:
3082 case AArch64::STRBBpost:
3083 case AArch64::STRBBpre:
3084 case AArch64::STRBpost:
3085 case AArch64::STRBpre:
3086 case AArch64::STRDpost:
3087 case AArch64::STRDpre:
3088 case AArch64::STRHHpost:
3089 case AArch64::STRHHpre:
3090 case AArch64::STRHpost:
3091 case AArch64::STRHpre:
3092 case AArch64::STRQpost:
3093 case AArch64::STRQpre:
3094 case AArch64::STRSpost:
3095 case AArch64::STRSpre:
3096 case AArch64::STRWpost:
3097 case AArch64::STRWpre:
3098 case AArch64::STRXpost:
3099 case AArch64::STRXpre:
3100 return 3;
3101 case AArch64::LDPDpost:
3102 case AArch64::LDPDpre:
3103 case AArch64::LDPQpost:
3104 case AArch64::LDPQpre:
3105 case AArch64::LDPSpost:
3106 case AArch64::LDPSpre:
3107 case AArch64::LDPWpost:
3108 case AArch64::LDPWpre:
3109 case AArch64::LDPXpost:
3110 case AArch64::LDPXpre:
3111 case AArch64::STGPpre:
3112 case AArch64::STGPpost:
3113 case AArch64::STPDpost:
3114 case AArch64::STPDpre:
3115 case AArch64::STPQpost:
3116 case AArch64::STPQpre:
3117 case AArch64::STPSpost:
3118 case AArch64::STPSpre:
3119 case AArch64::STPWpost:
3120 case AArch64::STPWpre:
3121 case AArch64::STPXpost:
3122 case AArch64::STPXpre:
3123 return 4;
3124 }
3125}
3126
3128 switch (MI.getOpcode()) {
3129 default:
3130 return false;
3131 // Scaled instructions.
3132 case AArch64::STRSui:
3133 case AArch64::STRDui:
3134 case AArch64::STRQui:
3135 case AArch64::STRXui:
3136 case AArch64::STRWui:
3137 case AArch64::LDRSui:
3138 case AArch64::LDRDui:
3139 case AArch64::LDRQui:
3140 case AArch64::LDRXui:
3141 case AArch64::LDRWui:
3142 case AArch64::LDRSWui:
3143 // Unscaled instructions.
3144 case AArch64::STURSi:
3145 case AArch64::STRSpre:
3146 case AArch64::STURDi:
3147 case AArch64::STRDpre:
3148 case AArch64::STURQi:
3149 case AArch64::STRQpre:
3150 case AArch64::STURWi:
3151 case AArch64::STRWpre:
3152 case AArch64::STURXi:
3153 case AArch64::STRXpre:
3154 case AArch64::LDURSi:
3155 case AArch64::LDRSpre:
3156 case AArch64::LDURDi:
3157 case AArch64::LDRDpre:
3158 case AArch64::LDURQi:
3159 case AArch64::LDRQpre:
3160 case AArch64::LDURWi:
3161 case AArch64::LDRWpre:
3162 case AArch64::LDURXi:
3163 case AArch64::LDRXpre:
3164 case AArch64::LDURSWi:
3165 case AArch64::LDRSWpre:
3166 // SVE instructions.
3167 case AArch64::LDR_ZXI:
3168 case AArch64::STR_ZXI:
3169 return true;
3170 }
3171}
3172
3174 switch (MI.getOpcode()) {
3175 default:
3176 assert((!MI.isCall() || !MI.isReturn()) &&
3177 "Unexpected instruction - was a new tail call opcode introduced?");
3178 return false;
3179 case AArch64::TCRETURNdi:
3180 case AArch64::TCRETURNri:
3181 case AArch64::TCRETURNrix16x17:
3182 case AArch64::TCRETURNrix17:
3183 case AArch64::TCRETURNrinotx16:
3184 case AArch64::TCRETURNriALL:
3185 case AArch64::AUTH_TCRETURN:
3186 case AArch64::AUTH_TCRETURN_BTI:
3187 return true;
3188 }
3189}
3190
3192 switch (Opc) {
3193 default:
3194 llvm_unreachable("Opcode has no flag setting equivalent!");
3195 // 32-bit cases:
3196 case AArch64::ADDWri:
3197 return AArch64::ADDSWri;
3198 case AArch64::ADDWrr:
3199 return AArch64::ADDSWrr;
3200 case AArch64::ADDWrs:
3201 return AArch64::ADDSWrs;
3202 case AArch64::ADDWrx:
3203 return AArch64::ADDSWrx;
3204 case AArch64::ANDWri:
3205 return AArch64::ANDSWri;
3206 case AArch64::ANDWrr:
3207 return AArch64::ANDSWrr;
3208 case AArch64::ANDWrs:
3209 return AArch64::ANDSWrs;
3210 case AArch64::BICWrr:
3211 return AArch64::BICSWrr;
3212 case AArch64::BICWrs:
3213 return AArch64::BICSWrs;
3214 case AArch64::SUBWri:
3215 return AArch64::SUBSWri;
3216 case AArch64::SUBWrr:
3217 return AArch64::SUBSWrr;
3218 case AArch64::SUBWrs:
3219 return AArch64::SUBSWrs;
3220 case AArch64::SUBWrx:
3221 return AArch64::SUBSWrx;
3222 // 64-bit cases:
3223 case AArch64::ADDXri:
3224 return AArch64::ADDSXri;
3225 case AArch64::ADDXrr:
3226 return AArch64::ADDSXrr;
3227 case AArch64::ADDXrs:
3228 return AArch64::ADDSXrs;
3229 case AArch64::ADDXrx:
3230 return AArch64::ADDSXrx;
3231 case AArch64::ANDXri:
3232 return AArch64::ANDSXri;
3233 case AArch64::ANDXrr:
3234 return AArch64::ANDSXrr;
3235 case AArch64::ANDXrs:
3236 return AArch64::ANDSXrs;
3237 case AArch64::BICXrr:
3238 return AArch64::BICSXrr;
3239 case AArch64::BICXrs:
3240 return AArch64::BICSXrs;
3241 case AArch64::SUBXri:
3242 return AArch64::SUBSXri;
3243 case AArch64::SUBXrr:
3244 return AArch64::SUBSXrr;
3245 case AArch64::SUBXrs:
3246 return AArch64::SUBSXrs;
3247 case AArch64::SUBXrx:
3248 return AArch64::SUBSXrx;
3249 // SVE instructions:
3250 case AArch64::AND_PPzPP:
3251 return AArch64::ANDS_PPzPP;
3252 case AArch64::BIC_PPzPP:
3253 return AArch64::BICS_PPzPP;
3254 case AArch64::EOR_PPzPP:
3255 return AArch64::EORS_PPzPP;
3256 case AArch64::NAND_PPzPP:
3257 return AArch64::NANDS_PPzPP;
3258 case AArch64::NOR_PPzPP:
3259 return AArch64::NORS_PPzPP;
3260 case AArch64::ORN_PPzPP:
3261 return AArch64::ORNS_PPzPP;
3262 case AArch64::ORR_PPzPP:
3263 return AArch64::ORRS_PPzPP;
3264 case AArch64::BRKA_PPzP:
3265 return AArch64::BRKAS_PPzP;
3266 case AArch64::BRKPA_PPzPP:
3267 return AArch64::BRKPAS_PPzPP;
3268 case AArch64::BRKB_PPzP:
3269 return AArch64::BRKBS_PPzP;
3270 case AArch64::BRKPB_PPzPP:
3271 return AArch64::BRKPBS_PPzPP;
3272 case AArch64::BRKN_PPzP:
3273 return AArch64::BRKNS_PPzP;
3274 case AArch64::RDFFR_PPz:
3275 return AArch64::RDFFRS_PPz;
3276 case AArch64::PTRUE_B:
3277 return AArch64::PTRUES_B;
3278 }
3279}
3280
3281// Is this a candidate for ld/st merging or pairing? For example, we don't
3282// touch volatiles or load/stores that have a hint to avoid pair formation.
3284
3285 bool IsPreLdSt = isPreLdSt(MI);
3286
3287 // If this is a volatile load/store, don't mess with it.
3288 if (MI.hasOrderedMemoryRef())
3289 return false;
3290
3291 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
3292 // For Pre-inc LD/ST, the operand is shifted by one.
3293 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
3294 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
3295 "Expected a reg or frame index operand.");
3296
3297 // For Pre-indexed addressing quadword instructions, the third operand is the
3298 // immediate value.
3299 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
3300
3301 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
3302 return false;
3303
3304 // Can't merge/pair if the instruction modifies the base register.
3305 // e.g., ldr x0, [x0]
3306 // This case will never occur with an FI base.
3307 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
3308 // STR<S,D,Q,W,X>pre, it can be merged.
3309 // For example:
3310 // ldr q0, [x11, #32]!
3311 // ldr q1, [x11, #16]
3312 // to
3313 // ldp q0, q1, [x11, #32]!
3314 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
3315 Register BaseReg = MI.getOperand(1).getReg();
3317 if (MI.modifiesRegister(BaseReg, TRI))
3318 return false;
3319 }
3320
3321 // Pairing SVE fills/spills is only valid for little-endian targets that
3322 // implement VLS 128.
3323 switch (MI.getOpcode()) {
3324 default:
3325 break;
3326 case AArch64::LDR_ZXI:
3327 case AArch64::STR_ZXI:
3328 if (!Subtarget.isLittleEndian() ||
3329 Subtarget.getSVEVectorSizeInBits() != 128)
3330 return false;
3331 }
3332
3333 // Check if this load/store has a hint to avoid pair formation.
3334 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
3336 return false;
3337
3338 // Do not pair any callee-save store/reload instructions in the
3339 // prologue/epilogue if the CFI information encoded the operations as separate
3340 // instructions, as that will cause the size of the actual prologue to mismatch
3341 // with the prologue size recorded in the Windows CFI.
3342 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
3343 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
3344 MI.getMF()->getFunction().needsUnwindTableEntry();
3345 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
3347 return false;
3348
3349 // On some CPUs quad load/store pairs are slower than two single load/stores.
3350 if (Subtarget.isPaired128Slow()) {
3351 switch (MI.getOpcode()) {
3352 default:
3353 break;
3354 case AArch64::LDURQi:
3355 case AArch64::STURQi:
3356 case AArch64::LDRQui:
3357 case AArch64::STRQui:
3358 return false;
3359 }
3360 }
3361
3362 return true;
3363}
3364
3367 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3368 const TargetRegisterInfo *TRI) const {
3369 if (!LdSt.mayLoadOrStore())
3370 return false;
3371
3372 const MachineOperand *BaseOp;
3373 TypeSize WidthN(0, false);
3374 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3375 WidthN, TRI))
3376 return false;
3377 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3378 // vector.
3379 Width = LocationSize::precise(WidthN);
3380 BaseOps.push_back(BaseOp);
3381 return true;
3382}
3383
3384std::optional<ExtAddrMode>
3386 const TargetRegisterInfo *TRI) const {
3387 const MachineOperand *Base; // Filled with the base operand of MI.
3388 int64_t Offset; // Filled with the offset of MI.
3389 bool OffsetIsScalable;
3390 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3391 return std::nullopt;
3392
3393 if (!Base->isReg())
3394 return std::nullopt;
3395 ExtAddrMode AM;
3396 AM.BaseReg = Base->getReg();
3397 AM.Displacement = Offset;
3398 AM.ScaledReg = 0;
3399 AM.Scale = 0;
3400 return AM;
3401}
3402
3404 Register Reg,
3405 const MachineInstr &AddrI,
3406 ExtAddrMode &AM) const {
3407 // Filter out instructions into which we cannot fold.
3408 unsigned NumBytes;
3409 int64_t OffsetScale = 1;
3410 switch (MemI.getOpcode()) {
3411 default:
3412 return false;
3413
3414 case AArch64::LDURQi:
3415 case AArch64::STURQi:
3416 NumBytes = 16;
3417 break;
3418
3419 case AArch64::LDURDi:
3420 case AArch64::STURDi:
3421 case AArch64::LDURXi:
3422 case AArch64::STURXi:
3423 NumBytes = 8;
3424 break;
3425
3426 case AArch64::LDURWi:
3427 case AArch64::LDURSWi:
3428 case AArch64::STURWi:
3429 NumBytes = 4;
3430 break;
3431
3432 case AArch64::LDURHi:
3433 case AArch64::STURHi:
3434 case AArch64::LDURHHi:
3435 case AArch64::STURHHi:
3436 case AArch64::LDURSHXi:
3437 case AArch64::LDURSHWi:
3438 NumBytes = 2;
3439 break;
3440
3441 case AArch64::LDRBroX:
3442 case AArch64::LDRBBroX:
3443 case AArch64::LDRSBXroX:
3444 case AArch64::LDRSBWroX:
3445 case AArch64::STRBroX:
3446 case AArch64::STRBBroX:
3447 case AArch64::LDURBi:
3448 case AArch64::LDURBBi:
3449 case AArch64::LDURSBXi:
3450 case AArch64::LDURSBWi:
3451 case AArch64::STURBi:
3452 case AArch64::STURBBi:
3453 case AArch64::LDRBui:
3454 case AArch64::LDRBBui:
3455 case AArch64::LDRSBXui:
3456 case AArch64::LDRSBWui:
3457 case AArch64::STRBui:
3458 case AArch64::STRBBui:
3459 NumBytes = 1;
3460 break;
3461
3462 case AArch64::LDRQroX:
3463 case AArch64::STRQroX:
3464 case AArch64::LDRQui:
3465 case AArch64::STRQui:
3466 NumBytes = 16;
3467 OffsetScale = 16;
3468 break;
3469
3470 case AArch64::LDRDroX:
3471 case AArch64::STRDroX:
3472 case AArch64::LDRXroX:
3473 case AArch64::STRXroX:
3474 case AArch64::LDRDui:
3475 case AArch64::STRDui:
3476 case AArch64::LDRXui:
3477 case AArch64::STRXui:
3478 NumBytes = 8;
3479 OffsetScale = 8;
3480 break;
3481
3482 case AArch64::LDRWroX:
3483 case AArch64::LDRSWroX:
3484 case AArch64::STRWroX:
3485 case AArch64::LDRWui:
3486 case AArch64::LDRSWui:
3487 case AArch64::STRWui:
3488 NumBytes = 4;
3489 OffsetScale = 4;
3490 break;
3491
3492 case AArch64::LDRHroX:
3493 case AArch64::STRHroX:
3494 case AArch64::LDRHHroX:
3495 case AArch64::STRHHroX:
3496 case AArch64::LDRSHXroX:
3497 case AArch64::LDRSHWroX:
3498 case AArch64::LDRHui:
3499 case AArch64::STRHui:
3500 case AArch64::LDRHHui:
3501 case AArch64::STRHHui:
3502 case AArch64::LDRSHXui:
3503 case AArch64::LDRSHWui:
3504 NumBytes = 2;
3505 OffsetScale = 2;
3506 break;
3507 }
3508
3509 // Check the fold operand is not the loaded/stored value.
3510 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3511 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3512 return false;
3513
3514 // Handle memory instructions with a [Reg, Reg] addressing mode.
3515 if (MemI.getOperand(2).isReg()) {
3516 // Bail if the addressing mode already includes extension of the offset
3517 // register.
3518 if (MemI.getOperand(3).getImm())
3519 return false;
3520
3521 // Check if we actually have a scaled offset.
3522 if (MemI.getOperand(4).getImm() == 0)
3523 OffsetScale = 1;
3524
3525 // If the address instructions is folded into the base register, then the
3526 // addressing mode must not have a scale. Then we can swap the base and the
3527 // scaled registers.
3528 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3529 return false;
3530
3531 switch (AddrI.getOpcode()) {
3532 default:
3533 return false;
3534
3535 case AArch64::SBFMXri:
3536 // sxtw Xa, Wm
3537 // ldr Xd, [Xn, Xa, lsl #N]
3538 // ->
3539 // ldr Xd, [Xn, Wm, sxtw #N]
3540 if (AddrI.getOperand(2).getImm() != 0 ||
3541 AddrI.getOperand(3).getImm() != 31)
3542 return false;
3543
3544 AM.BaseReg = MemI.getOperand(1).getReg();
3545 if (AM.BaseReg == Reg)
3546 AM.BaseReg = MemI.getOperand(2).getReg();
3547 AM.ScaledReg = AddrI.getOperand(1).getReg();
3548 AM.Scale = OffsetScale;
3549 AM.Displacement = 0;
3551 return true;
3552
3553 case TargetOpcode::SUBREG_TO_REG: {
3554 // mov Wa, Wm
3555 // ldr Xd, [Xn, Xa, lsl #N]
3556 // ->
3557 // ldr Xd, [Xn, Wm, uxtw #N]
3558
3559 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3560 if (AddrI.getOperand(1).getImm() != 0 ||
3561 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3562 return false;
3563
3564 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3565 Register OffsetReg = AddrI.getOperand(2).getReg();
3566 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3567 return false;
3568
3569 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3570 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3571 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3572 DefMI.getOperand(3).getImm() != 0)
3573 return false;
3574
3575 AM.BaseReg = MemI.getOperand(1).getReg();
3576 if (AM.BaseReg == Reg)
3577 AM.BaseReg = MemI.getOperand(2).getReg();
3578 AM.ScaledReg = DefMI.getOperand(2).getReg();
3579 AM.Scale = OffsetScale;
3580 AM.Displacement = 0;
3582 return true;
3583 }
3584 }
3585 }
3586
3587 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3588
3589 // Check we are not breaking a potential conversion to an LDP.
3590 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3591 int64_t NewOffset) -> bool {
3592 int64_t MinOffset, MaxOffset;
3593 switch (NumBytes) {
3594 default:
3595 return true;
3596 case 4:
3597 MinOffset = -256;
3598 MaxOffset = 252;
3599 break;
3600 case 8:
3601 MinOffset = -512;
3602 MaxOffset = 504;
3603 break;
3604 case 16:
3605 MinOffset = -1024;
3606 MaxOffset = 1008;
3607 break;
3608 }
3609 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3610 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3611 };
3612 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3613 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3614 int64_t NewOffset = OldOffset + Disp;
3615 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3616 return false;
3617 // If the old offset would fit into an LDP, but the new offset wouldn't,
3618 // bail out.
3619 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3620 return false;
3621 AM.BaseReg = AddrI.getOperand(1).getReg();
3622 AM.ScaledReg = 0;
3623 AM.Scale = 0;
3624 AM.Displacement = NewOffset;
3626 return true;
3627 };
3628
3629 auto canFoldAddRegIntoAddrMode =
3630 [&](int64_t Scale,
3632 if (MemI.getOperand(2).getImm() != 0)
3633 return false;
3634 if ((unsigned)Scale != Scale)
3635 return false;
3636 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3637 return false;
3638 AM.BaseReg = AddrI.getOperand(1).getReg();
3639 AM.ScaledReg = AddrI.getOperand(2).getReg();
3640 AM.Scale = Scale;
3641 AM.Displacement = 0;
3642 AM.Form = Form;
3643 return true;
3644 };
3645
3646 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3647 unsigned Opcode = MemI.getOpcode();
3648 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3649 Subtarget.isSTRQroSlow();
3650 };
3651
3652 int64_t Disp = 0;
3653 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3654 switch (AddrI.getOpcode()) {
3655 default:
3656 return false;
3657
3658 case AArch64::ADDXri:
3659 // add Xa, Xn, #N
3660 // ldr Xd, [Xa, #M]
3661 // ->
3662 // ldr Xd, [Xn, #N'+M]
3663 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3664 return canFoldAddSubImmIntoAddrMode(Disp);
3665
3666 case AArch64::SUBXri:
3667 // sub Xa, Xn, #N
3668 // ldr Xd, [Xa, #M]
3669 // ->
3670 // ldr Xd, [Xn, #N'+M]
3671 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3672 return canFoldAddSubImmIntoAddrMode(-Disp);
3673
3674 case AArch64::ADDXrs: {
3675 // add Xa, Xn, Xm, lsl #N
3676 // ldr Xd, [Xa]
3677 // ->
3678 // ldr Xd, [Xn, Xm, lsl #N]
3679
3680 // Don't fold the add if the result would be slower, unless optimising for
3681 // size.
3682 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3684 return false;
3685 Shift = AArch64_AM::getShiftValue(Shift);
3686 if (!OptSize) {
3687 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3688 return false;
3689 if (avoidSlowSTRQ(MemI))
3690 return false;
3691 }
3692 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3693 }
3694
3695 case AArch64::ADDXrr:
3696 // add Xa, Xn, Xm
3697 // ldr Xd, [Xa]
3698 // ->
3699 // ldr Xd, [Xn, Xm, lsl #0]
3700
3701 // Don't fold the add if the result would be slower, unless optimising for
3702 // size.
3703 if (!OptSize && avoidSlowSTRQ(MemI))
3704 return false;
3705 return canFoldAddRegIntoAddrMode(1);
3706
3707 case AArch64::ADDXrx:
3708 // add Xa, Xn, Wm, {s,u}xtw #N
3709 // ldr Xd, [Xa]
3710 // ->
3711 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3712
3713 // Don't fold the add if the result would be slower, unless optimising for
3714 // size.
3715 if (!OptSize && avoidSlowSTRQ(MemI))
3716 return false;
3717
3718 // Can fold only sign-/zero-extend of a word.
3719 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3721 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3722 return false;
3723
3724 return canFoldAddRegIntoAddrMode(
3725 1ULL << AArch64_AM::getArithShiftValue(Imm),
3728 }
3729}
3730
3731// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3732// return the opcode of an instruction performing the same operation, but using
3733// the [Reg, Reg] addressing mode.
3734static unsigned regOffsetOpcode(unsigned Opcode) {
3735 switch (Opcode) {
3736 default:
3737 llvm_unreachable("Address folding not implemented for instruction");
3738
3739 case AArch64::LDURQi:
3740 case AArch64::LDRQui:
3741 return AArch64::LDRQroX;
3742 case AArch64::STURQi:
3743 case AArch64::STRQui:
3744 return AArch64::STRQroX;
3745 case AArch64::LDURDi:
3746 case AArch64::LDRDui:
3747 return AArch64::LDRDroX;
3748 case AArch64::STURDi:
3749 case AArch64::STRDui:
3750 return AArch64::STRDroX;
3751 case AArch64::LDURXi:
3752 case AArch64::LDRXui:
3753 return AArch64::LDRXroX;
3754 case AArch64::STURXi:
3755 case AArch64::STRXui:
3756 return AArch64::STRXroX;
3757 case AArch64::LDURWi:
3758 case AArch64::LDRWui:
3759 return AArch64::LDRWroX;
3760 case AArch64::LDURSWi:
3761 case AArch64::LDRSWui:
3762 return AArch64::LDRSWroX;
3763 case AArch64::STURWi:
3764 case AArch64::STRWui:
3765 return AArch64::STRWroX;
3766 case AArch64::LDURHi:
3767 case AArch64::LDRHui:
3768 return AArch64::LDRHroX;
3769 case AArch64::STURHi:
3770 case AArch64::STRHui:
3771 return AArch64::STRHroX;
3772 case AArch64::LDURHHi:
3773 case AArch64::LDRHHui:
3774 return AArch64::LDRHHroX;
3775 case AArch64::STURHHi:
3776 case AArch64::STRHHui:
3777 return AArch64::STRHHroX;
3778 case AArch64::LDURSHXi:
3779 case AArch64::LDRSHXui:
3780 return AArch64::LDRSHXroX;
3781 case AArch64::LDURSHWi:
3782 case AArch64::LDRSHWui:
3783 return AArch64::LDRSHWroX;
3784 case AArch64::LDURBi:
3785 case AArch64::LDRBui:
3786 return AArch64::LDRBroX;
3787 case AArch64::LDURBBi:
3788 case AArch64::LDRBBui:
3789 return AArch64::LDRBBroX;
3790 case AArch64::LDURSBXi:
3791 case AArch64::LDRSBXui:
3792 return AArch64::LDRSBXroX;
3793 case AArch64::LDURSBWi:
3794 case AArch64::LDRSBWui:
3795 return AArch64::LDRSBWroX;
3796 case AArch64::STURBi:
3797 case AArch64::STRBui:
3798 return AArch64::STRBroX;
3799 case AArch64::STURBBi:
3800 case AArch64::STRBBui:
3801 return AArch64::STRBBroX;
3802 }
3803}
3804
3805// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3806// the opcode of an instruction performing the same operation, but using the
3807// [Reg, #Imm] addressing mode with scaled offset.
3808unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3809 switch (Opcode) {
3810 default:
3811 llvm_unreachable("Address folding not implemented for instruction");
3812
3813 case AArch64::LDURQi:
3814 Scale = 16;
3815 return AArch64::LDRQui;
3816 case AArch64::STURQi:
3817 Scale = 16;
3818 return AArch64::STRQui;
3819 case AArch64::LDURDi:
3820 Scale = 8;
3821 return AArch64::LDRDui;
3822 case AArch64::STURDi:
3823 Scale = 8;
3824 return AArch64::STRDui;
3825 case AArch64::LDURXi:
3826 Scale = 8;
3827 return AArch64::LDRXui;
3828 case AArch64::STURXi:
3829 Scale = 8;
3830 return AArch64::STRXui;
3831 case AArch64::LDURWi:
3832 Scale = 4;
3833 return AArch64::LDRWui;
3834 case AArch64::LDURSWi:
3835 Scale = 4;
3836 return AArch64::LDRSWui;
3837 case AArch64::STURWi:
3838 Scale = 4;
3839 return AArch64::STRWui;
3840 case AArch64::LDURHi:
3841 Scale = 2;
3842 return AArch64::LDRHui;
3843 case AArch64::STURHi:
3844 Scale = 2;
3845 return AArch64::STRHui;
3846 case AArch64::LDURHHi:
3847 Scale = 2;
3848 return AArch64::LDRHHui;
3849 case AArch64::STURHHi:
3850 Scale = 2;
3851 return AArch64::STRHHui;
3852 case AArch64::LDURSHXi:
3853 Scale = 2;
3854 return AArch64::LDRSHXui;
3855 case AArch64::LDURSHWi:
3856 Scale = 2;
3857 return AArch64::LDRSHWui;
3858 case AArch64::LDURBi:
3859 Scale = 1;
3860 return AArch64::LDRBui;
3861 case AArch64::LDURBBi:
3862 Scale = 1;
3863 return AArch64::LDRBBui;
3864 case AArch64::LDURSBXi:
3865 Scale = 1;
3866 return AArch64::LDRSBXui;
3867 case AArch64::LDURSBWi:
3868 Scale = 1;
3869 return AArch64::LDRSBWui;
3870 case AArch64::STURBi:
3871 Scale = 1;
3872 return AArch64::STRBui;
3873 case AArch64::STURBBi:
3874 Scale = 1;
3875 return AArch64::STRBBui;
3876 case AArch64::LDRQui:
3877 case AArch64::STRQui:
3878 Scale = 16;
3879 return Opcode;
3880 case AArch64::LDRDui:
3881 case AArch64::STRDui:
3882 case AArch64::LDRXui:
3883 case AArch64::STRXui:
3884 Scale = 8;
3885 return Opcode;
3886 case AArch64::LDRWui:
3887 case AArch64::LDRSWui:
3888 case AArch64::STRWui:
3889 Scale = 4;
3890 return Opcode;
3891 case AArch64::LDRHui:
3892 case AArch64::STRHui:
3893 case AArch64::LDRHHui:
3894 case AArch64::STRHHui:
3895 case AArch64::LDRSHXui:
3896 case AArch64::LDRSHWui:
3897 Scale = 2;
3898 return Opcode;
3899 case AArch64::LDRBui:
3900 case AArch64::LDRBBui:
3901 case AArch64::LDRSBXui:
3902 case AArch64::LDRSBWui:
3903 case AArch64::STRBui:
3904 case AArch64::STRBBui:
3905 Scale = 1;
3906 return Opcode;
3907 }
3908}
3909
3910// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3911// the opcode of an instruction performing the same operation, but using the
3912// [Reg, #Imm] addressing mode with unscaled offset.
3913unsigned unscaledOffsetOpcode(unsigned Opcode) {
3914 switch (Opcode) {
3915 default:
3916 llvm_unreachable("Address folding not implemented for instruction");
3917
3918 case AArch64::LDURQi:
3919 case AArch64::STURQi:
3920 case AArch64::LDURDi:
3921 case AArch64::STURDi:
3922 case AArch64::LDURXi:
3923 case AArch64::STURXi:
3924 case AArch64::LDURWi:
3925 case AArch64::LDURSWi:
3926 case AArch64::STURWi:
3927 case AArch64::LDURHi:
3928 case AArch64::STURHi:
3929 case AArch64::LDURHHi:
3930 case AArch64::STURHHi:
3931 case AArch64::LDURSHXi:
3932 case AArch64::LDURSHWi:
3933 case AArch64::LDURBi:
3934 case AArch64::STURBi:
3935 case AArch64::LDURBBi:
3936 case AArch64::STURBBi:
3937 case AArch64::LDURSBWi:
3938 case AArch64::LDURSBXi:
3939 return Opcode;
3940 case AArch64::LDRQui:
3941 return AArch64::LDURQi;
3942 case AArch64::STRQui:
3943 return AArch64::STURQi;
3944 case AArch64::LDRDui:
3945 return AArch64::LDURDi;
3946 case AArch64::STRDui:
3947 return AArch64::STURDi;
3948 case AArch64::LDRXui:
3949 return AArch64::LDURXi;
3950 case AArch64::STRXui:
3951 return AArch64::STURXi;
3952 case AArch64::LDRWui:
3953 return AArch64::LDURWi;
3954 case AArch64::LDRSWui:
3955 return AArch64::LDURSWi;
3956 case AArch64::STRWui:
3957 return AArch64::STURWi;
3958 case AArch64::LDRHui:
3959 return AArch64::LDURHi;
3960 case AArch64::STRHui:
3961 return AArch64::STURHi;
3962 case AArch64::LDRHHui:
3963 return AArch64::LDURHHi;
3964 case AArch64::STRHHui:
3965 return AArch64::STURHHi;
3966 case AArch64::LDRSHXui:
3967 return AArch64::LDURSHXi;
3968 case AArch64::LDRSHWui:
3969 return AArch64::LDURSHWi;
3970 case AArch64::LDRBBui:
3971 return AArch64::LDURBBi;
3972 case AArch64::LDRBui:
3973 return AArch64::LDURBi;
3974 case AArch64::STRBBui:
3975 return AArch64::STURBBi;
3976 case AArch64::STRBui:
3977 return AArch64::STURBi;
3978 case AArch64::LDRSBWui:
3979 return AArch64::LDURSBWi;
3980 case AArch64::LDRSBXui:
3981 return AArch64::LDURSBXi;
3982 }
3983}
3984
3985// Given the opcode of a memory load/store instruction, return the opcode of an
3986// instruction performing the same operation, but using
3987// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3988// offset register.
3989static unsigned offsetExtendOpcode(unsigned Opcode) {
3990 switch (Opcode) {
3991 default:
3992 llvm_unreachable("Address folding not implemented for instruction");
3993
3994 case AArch64::LDRQroX:
3995 case AArch64::LDURQi:
3996 case AArch64::LDRQui:
3997 return AArch64::LDRQroW;
3998 case AArch64::STRQroX:
3999 case AArch64::STURQi:
4000 case AArch64::STRQui:
4001 return AArch64::STRQroW;
4002 case AArch64::LDRDroX:
4003 case AArch64::LDURDi:
4004 case AArch64::LDRDui:
4005 return AArch64::LDRDroW;
4006 case AArch64::STRDroX:
4007 case AArch64::STURDi:
4008 case AArch64::STRDui:
4009 return AArch64::STRDroW;
4010 case AArch64::LDRXroX:
4011 case AArch64::LDURXi:
4012 case AArch64::LDRXui:
4013 return AArch64::LDRXroW;
4014 case AArch64::STRXroX:
4015 case AArch64::STURXi:
4016 case AArch64::STRXui:
4017 return AArch64::STRXroW;
4018 case AArch64::LDRWroX:
4019 case AArch64::LDURWi:
4020 case AArch64::LDRWui:
4021 return AArch64::LDRWroW;
4022 case AArch64::LDRSWroX:
4023 case AArch64::LDURSWi:
4024 case AArch64::LDRSWui:
4025 return AArch64::LDRSWroW;
4026 case AArch64::STRWroX:
4027 case AArch64::STURWi:
4028 case AArch64::STRWui:
4029 return AArch64::STRWroW;
4030 case AArch64::LDRHroX:
4031 case AArch64::LDURHi:
4032 case AArch64::LDRHui:
4033 return AArch64::LDRHroW;
4034 case AArch64::STRHroX:
4035 case AArch64::STURHi:
4036 case AArch64::STRHui:
4037 return AArch64::STRHroW;
4038 case AArch64::LDRHHroX:
4039 case AArch64::LDURHHi:
4040 case AArch64::LDRHHui:
4041 return AArch64::LDRHHroW;
4042 case AArch64::STRHHroX:
4043 case AArch64::STURHHi:
4044 case AArch64::STRHHui:
4045 return AArch64::STRHHroW;
4046 case AArch64::LDRSHXroX:
4047 case AArch64::LDURSHXi:
4048 case AArch64::LDRSHXui:
4049 return AArch64::LDRSHXroW;
4050 case AArch64::LDRSHWroX:
4051 case AArch64::LDURSHWi:
4052 case AArch64::LDRSHWui:
4053 return AArch64::LDRSHWroW;
4054 case AArch64::LDRBroX:
4055 case AArch64::LDURBi:
4056 case AArch64::LDRBui:
4057 return AArch64::LDRBroW;
4058 case AArch64::LDRBBroX:
4059 case AArch64::LDURBBi:
4060 case AArch64::LDRBBui:
4061 return AArch64::LDRBBroW;
4062 case AArch64::LDRSBXroX:
4063 case AArch64::LDURSBXi:
4064 case AArch64::LDRSBXui:
4065 return AArch64::LDRSBXroW;
4066 case AArch64::LDRSBWroX:
4067 case AArch64::LDURSBWi:
4068 case AArch64::LDRSBWui:
4069 return AArch64::LDRSBWroW;
4070 case AArch64::STRBroX:
4071 case AArch64::STURBi:
4072 case AArch64::STRBui:
4073 return AArch64::STRBroW;
4074 case AArch64::STRBBroX:
4075 case AArch64::STURBBi:
4076 case AArch64::STRBBui:
4077 return AArch64::STRBBroW;
4078 }
4079}
4080
4082 const ExtAddrMode &AM) const {
4083
4084 const DebugLoc &DL = MemI.getDebugLoc();
4085 MachineBasicBlock &MBB = *MemI.getParent();
4087
4089 if (AM.ScaledReg) {
4090 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
4091 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
4092 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4093 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4094 .addReg(MemI.getOperand(0).getReg(),
4095 getDefRegState(MemI.mayLoad()))
4096 .addReg(AM.BaseReg)
4097 .addReg(AM.ScaledReg)
4098 .addImm(0)
4099 .addImm(AM.Scale > 1)
4100 .setMemRefs(MemI.memoperands())
4101 .setMIFlags(MemI.getFlags());
4102 return B.getInstr();
4103 }
4104
4105 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
4106 "Addressing mode not supported for folding");
4107
4108 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
4109 unsigned Scale = 1;
4110 unsigned Opcode = MemI.getOpcode();
4111 if (isInt<9>(AM.Displacement))
4112 Opcode = unscaledOffsetOpcode(Opcode);
4113 else
4114 Opcode = scaledOffsetOpcode(Opcode, Scale);
4115
4116 auto B =
4117 BuildMI(MBB, MemI, DL, get(Opcode))
4118 .addReg(MemI.getOperand(0).getReg(), getDefRegState(MemI.mayLoad()))
4119 .addReg(AM.BaseReg)
4120 .addImm(AM.Displacement / Scale)
4121 .setMemRefs(MemI.memoperands())
4122 .setMIFlags(MemI.getFlags());
4123 return B.getInstr();
4124 }
4125
4128 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
4129 assert(AM.ScaledReg && !AM.Displacement &&
4130 "Address offset can be a register or an immediate, but not both");
4131 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
4132 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4133 // Make sure the offset register is in the correct register class.
4134 Register OffsetReg = AM.ScaledReg;
4135 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
4136 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
4137 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4138 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
4139 .addReg(AM.ScaledReg, {}, AArch64::sub_32);
4140 }
4141 auto B =
4142 BuildMI(MBB, MemI, DL, get(Opcode))
4143 .addReg(MemI.getOperand(0).getReg(), getDefRegState(MemI.mayLoad()))
4144 .addReg(AM.BaseReg)
4145 .addReg(OffsetReg)
4147 .addImm(AM.Scale != 1)
4148 .setMemRefs(MemI.memoperands())
4149 .setMIFlags(MemI.getFlags());
4150
4151 return B.getInstr();
4152 }
4153
4155 "Function must not be called with an addressing mode it can't handle");
4156}
4157
4158/// Return true if the opcode is a post-index ld/st instruction, which really
4159/// loads from base+0.
4160static bool isPostIndexLdStOpcode(unsigned Opcode) {
4161 switch (Opcode) {
4162 default:
4163 return false;
4164 case AArch64::LD1Fourv16b_POST:
4165 case AArch64::LD1Fourv1d_POST:
4166 case AArch64::LD1Fourv2d_POST:
4167 case AArch64::LD1Fourv2s_POST:
4168 case AArch64::LD1Fourv4h_POST:
4169 case AArch64::LD1Fourv4s_POST:
4170 case AArch64::LD1Fourv8b_POST:
4171 case AArch64::LD1Fourv8h_POST:
4172 case AArch64::LD1Onev16b_POST:
4173 case AArch64::LD1Onev1d_POST:
4174 case AArch64::LD1Onev2d_POST:
4175 case AArch64::LD1Onev2s_POST:
4176 case AArch64::LD1Onev4h_POST:
4177 case AArch64::LD1Onev4s_POST:
4178 case AArch64::LD1Onev8b_POST:
4179 case AArch64::LD1Onev8h_POST:
4180 case AArch64::LD1Rv16b_POST:
4181 case AArch64::LD1Rv1d_POST:
4182 case AArch64::LD1Rv2d_POST:
4183 case AArch64::LD1Rv2s_POST:
4184 case AArch64::LD1Rv4h_POST:
4185 case AArch64::LD1Rv4s_POST:
4186 case AArch64::LD1Rv8b_POST:
4187 case AArch64::LD1Rv8h_POST:
4188 case AArch64::LD1Threev16b_POST:
4189 case AArch64::LD1Threev1d_POST:
4190 case AArch64::LD1Threev2d_POST:
4191 case AArch64::LD1Threev2s_POST:
4192 case AArch64::LD1Threev4h_POST:
4193 case AArch64::LD1Threev4s_POST:
4194 case AArch64::LD1Threev8b_POST:
4195 case AArch64::LD1Threev8h_POST:
4196 case AArch64::LD1Twov16b_POST:
4197 case AArch64::LD1Twov1d_POST:
4198 case AArch64::LD1Twov2d_POST:
4199 case AArch64::LD1Twov2s_POST:
4200 case AArch64::LD1Twov4h_POST:
4201 case AArch64::LD1Twov4s_POST:
4202 case AArch64::LD1Twov8b_POST:
4203 case AArch64::LD1Twov8h_POST:
4204 case AArch64::LD1i16_POST:
4205 case AArch64::LD1i32_POST:
4206 case AArch64::LD1i64_POST:
4207 case AArch64::LD1i8_POST:
4208 case AArch64::LD2Rv16b_POST:
4209 case AArch64::LD2Rv1d_POST:
4210 case AArch64::LD2Rv2d_POST:
4211 case AArch64::LD2Rv2s_POST:
4212 case AArch64::LD2Rv4h_POST:
4213 case AArch64::LD2Rv4s_POST:
4214 case AArch64::LD2Rv8b_POST:
4215 case AArch64::LD2Rv8h_POST:
4216 case AArch64::LD2Twov16b_POST:
4217 case AArch64::LD2Twov2d_POST:
4218 case AArch64::LD2Twov2s_POST:
4219 case AArch64::LD2Twov4h_POST:
4220 case AArch64::LD2Twov4s_POST:
4221 case AArch64::LD2Twov8b_POST:
4222 case AArch64::LD2Twov8h_POST:
4223 case AArch64::LD2i16_POST:
4224 case AArch64::LD2i32_POST:
4225 case AArch64::LD2i64_POST:
4226 case AArch64::LD2i8_POST:
4227 case AArch64::LD3Rv16b_POST:
4228 case AArch64::LD3Rv1d_POST:
4229 case AArch64::LD3Rv2d_POST:
4230 case AArch64::LD3Rv2s_POST:
4231 case AArch64::LD3Rv4h_POST:
4232 case AArch64::LD3Rv4s_POST:
4233 case AArch64::LD3Rv8b_POST:
4234 case AArch64::LD3Rv8h_POST:
4235 case AArch64::LD3Threev16b_POST:
4236 case AArch64::LD3Threev2d_POST:
4237 case AArch64::LD3Threev2s_POST:
4238 case AArch64::LD3Threev4h_POST:
4239 case AArch64::LD3Threev4s_POST:
4240 case AArch64::LD3Threev8b_POST:
4241 case AArch64::LD3Threev8h_POST:
4242 case AArch64::LD3i16_POST:
4243 case AArch64::LD3i32_POST:
4244 case AArch64::LD3i64_POST:
4245 case AArch64::LD3i8_POST:
4246 case AArch64::LD4Fourv16b_POST:
4247 case AArch64::LD4Fourv2d_POST:
4248 case AArch64::LD4Fourv2s_POST:
4249 case AArch64::LD4Fourv4h_POST:
4250 case AArch64::LD4Fourv4s_POST:
4251 case AArch64::LD4Fourv8b_POST:
4252 case AArch64::LD4Fourv8h_POST:
4253 case AArch64::LD4Rv16b_POST:
4254 case AArch64::LD4Rv1d_POST:
4255 case AArch64::LD4Rv2d_POST:
4256 case AArch64::LD4Rv2s_POST:
4257 case AArch64::LD4Rv4h_POST:
4258 case AArch64::LD4Rv4s_POST:
4259 case AArch64::LD4Rv8b_POST:
4260 case AArch64::LD4Rv8h_POST:
4261 case AArch64::LD4i16_POST:
4262 case AArch64::LD4i32_POST:
4263 case AArch64::LD4i64_POST:
4264 case AArch64::LD4i8_POST:
4265 case AArch64::LDAPRWpost:
4266 case AArch64::LDAPRXpost:
4267 case AArch64::LDIAPPWpost:
4268 case AArch64::LDIAPPXpost:
4269 case AArch64::LDPDpost:
4270 case AArch64::LDPQpost:
4271 case AArch64::LDPSWpost:
4272 case AArch64::LDPSpost:
4273 case AArch64::LDPWpost:
4274 case AArch64::LDPXpost:
4275 case AArch64::LDRBBpost:
4276 case AArch64::LDRBpost:
4277 case AArch64::LDRDpost:
4278 case AArch64::LDRHHpost:
4279 case AArch64::LDRHpost:
4280 case AArch64::LDRQpost:
4281 case AArch64::LDRSBWpost:
4282 case AArch64::LDRSBXpost:
4283 case AArch64::LDRSHWpost:
4284 case AArch64::LDRSHXpost:
4285 case AArch64::LDRSWpost:
4286 case AArch64::LDRSpost:
4287 case AArch64::LDRWpost:
4288 case AArch64::LDRXpost:
4289 case AArch64::ST1Fourv16b_POST:
4290 case AArch64::ST1Fourv1d_POST:
4291 case AArch64::ST1Fourv2d_POST:
4292 case AArch64::ST1Fourv2s_POST:
4293 case AArch64::ST1Fourv4h_POST:
4294 case AArch64::ST1Fourv4s_POST:
4295 case AArch64::ST1Fourv8b_POST:
4296 case AArch64::ST1Fourv8h_POST:
4297 case AArch64::ST1Onev16b_POST:
4298 case AArch64::ST1Onev1d_POST:
4299 case AArch64::ST1Onev2d_POST:
4300 case AArch64::ST1Onev2s_POST:
4301 case AArch64::ST1Onev4h_POST:
4302 case AArch64::ST1Onev4s_POST:
4303 case AArch64::ST1Onev8b_POST:
4304 case AArch64::ST1Onev8h_POST:
4305 case AArch64::ST1Threev16b_POST:
4306 case AArch64::ST1Threev1d_POST:
4307 case AArch64::ST1Threev2d_POST:
4308 case AArch64::ST1Threev2s_POST:
4309 case AArch64::ST1Threev4h_POST:
4310 case AArch64::ST1Threev4s_POST:
4311 case AArch64::ST1Threev8b_POST:
4312 case AArch64::ST1Threev8h_POST:
4313 case AArch64::ST1Twov16b_POST:
4314 case AArch64::ST1Twov1d_POST:
4315 case AArch64::ST1Twov2d_POST:
4316 case AArch64::ST1Twov2s_POST:
4317 case AArch64::ST1Twov4h_POST:
4318 case AArch64::ST1Twov4s_POST:
4319 case AArch64::ST1Twov8b_POST:
4320 case AArch64::ST1Twov8h_POST:
4321 case AArch64::ST1i16_POST:
4322 case AArch64::ST1i32_POST:
4323 case AArch64::ST1i64_POST:
4324 case AArch64::ST1i8_POST:
4325 case AArch64::ST2GPostIndex:
4326 case AArch64::ST2Twov16b_POST:
4327 case AArch64::ST2Twov2d_POST:
4328 case AArch64::ST2Twov2s_POST:
4329 case AArch64::ST2Twov4h_POST:
4330 case AArch64::ST2Twov4s_POST:
4331 case AArch64::ST2Twov8b_POST:
4332 case AArch64::ST2Twov8h_POST:
4333 case AArch64::ST2i16_POST:
4334 case AArch64::ST2i32_POST:
4335 case AArch64::ST2i64_POST:
4336 case AArch64::ST2i8_POST:
4337 case AArch64::ST3Threev16b_POST:
4338 case AArch64::ST3Threev2d_POST:
4339 case AArch64::ST3Threev2s_POST:
4340 case AArch64::ST3Threev4h_POST:
4341 case AArch64::ST3Threev4s_POST:
4342 case AArch64::ST3Threev8b_POST:
4343 case AArch64::ST3Threev8h_POST:
4344 case AArch64::ST3i16_POST:
4345 case AArch64::ST3i32_POST:
4346 case AArch64::ST3i64_POST:
4347 case AArch64::ST3i8_POST:
4348 case AArch64::ST4Fourv16b_POST:
4349 case AArch64::ST4Fourv2d_POST:
4350 case AArch64::ST4Fourv2s_POST:
4351 case AArch64::ST4Fourv4h_POST:
4352 case AArch64::ST4Fourv4s_POST:
4353 case AArch64::ST4Fourv8b_POST:
4354 case AArch64::ST4Fourv8h_POST:
4355 case AArch64::ST4i16_POST:
4356 case AArch64::ST4i32_POST:
4357 case AArch64::ST4i64_POST:
4358 case AArch64::ST4i8_POST:
4359 case AArch64::STGPostIndex:
4360 case AArch64::STGPpost:
4361 case AArch64::STPDpost:
4362 case AArch64::STPQpost:
4363 case AArch64::STPSpost:
4364 case AArch64::STPWpost:
4365 case AArch64::STPXpost:
4366 case AArch64::STRBBpost:
4367 case AArch64::STRBpost:
4368 case AArch64::STRDpost:
4369 case AArch64::STRHHpost:
4370 case AArch64::STRHpost:
4371 case AArch64::STRQpost:
4372 case AArch64::STRSpost:
4373 case AArch64::STRWpost:
4374 case AArch64::STRXpost:
4375 case AArch64::STZ2GPostIndex:
4376 case AArch64::STZGPostIndex:
4377 return true;
4378 }
4379}
4380
4382 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4383 bool &OffsetIsScalable, TypeSize &Width,
4384 const TargetRegisterInfo *TRI) const {
4385 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4386 // Handle only loads/stores with base register followed by immediate offset.
4387 if (LdSt.getNumExplicitOperands() == 3) {
4388 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4389 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4390 !LdSt.getOperand(2).isImm())
4391 return false;
4392 } else if (LdSt.getNumExplicitOperands() == 4) {
4393 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4394 if (!LdSt.getOperand(1).isReg() ||
4395 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4396 !LdSt.getOperand(3).isImm())
4397 return false;
4398 } else
4399 return false;
4400
4401 // Get the scaling factor for the instruction and set the width for the
4402 // instruction.
4403 TypeSize Scale(0U, false);
4404 int64_t Dummy1, Dummy2;
4405
4406 // If this returns false, then it's an instruction we don't want to handle.
4407 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4408 return false;
4409
4410 // Compute the offset. Offset is calculated as the immediate operand
4411 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4412 // set to 1. Postindex are a special case which have an offset of 0.
4413 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4414 BaseOp = &LdSt.getOperand(2);
4415 Offset = 0;
4416 } else if (LdSt.getNumExplicitOperands() == 3) {
4417 BaseOp = &LdSt.getOperand(1);
4418 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4419 } else {
4420 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4421 BaseOp = &LdSt.getOperand(2);
4422 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4423 }
4424 OffsetIsScalable = Scale.isScalable();
4425
4426 return BaseOp->isReg() || BaseOp->isFI();
4427}
4428
4431 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4432 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4433 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4434 return OfsOp;
4435}
4436
4437bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4438 TypeSize &Width, int64_t &MinOffset,
4439 int64_t &MaxOffset) {
4440 switch (Opcode) {
4441 // Not a memory operation or something we want to handle.
4442 default:
4443 Scale = TypeSize::getFixed(0);
4444 Width = TypeSize::getFixed(0);
4445 MinOffset = MaxOffset = 0;
4446 return false;
4447 // LDR / STR
4448 case AArch64::LDRQui:
4449 case AArch64::STRQui:
4450 Scale = TypeSize::getFixed(16);
4451 Width = TypeSize::getFixed(16);
4452 MinOffset = 0;
4453 MaxOffset = 4095;
4454 break;
4455 case AArch64::LDRXui:
4456 case AArch64::LDRDui:
4457 case AArch64::STRXui:
4458 case AArch64::STRDui:
4459 case AArch64::PRFMui:
4460 Scale = TypeSize::getFixed(8);
4461 Width = TypeSize::getFixed(8);
4462 MinOffset = 0;
4463 MaxOffset = 4095;
4464 break;
4465 case AArch64::LDRWui:
4466 case AArch64::LDRSui:
4467 case AArch64::LDRSWui:
4468 case AArch64::STRWui:
4469 case AArch64::STRSui:
4470 Scale = TypeSize::getFixed(4);
4471 Width = TypeSize::getFixed(4);
4472 MinOffset = 0;
4473 MaxOffset = 4095;
4474 break;
4475 case AArch64::LDRHui:
4476 case AArch64::LDRHHui:
4477 case AArch64::LDRSHWui:
4478 case AArch64::LDRSHXui:
4479 case AArch64::STRHui:
4480 case AArch64::STRHHui:
4481 Scale = TypeSize::getFixed(2);
4482 Width = TypeSize::getFixed(2);
4483 MinOffset = 0;
4484 MaxOffset = 4095;
4485 break;
4486 case AArch64::LDRBui:
4487 case AArch64::LDRBBui:
4488 case AArch64::LDRSBWui:
4489 case AArch64::LDRSBXui:
4490 case AArch64::STRBui:
4491 case AArch64::STRBBui:
4492 Scale = TypeSize::getFixed(1);
4493 Width = TypeSize::getFixed(1);
4494 MinOffset = 0;
4495 MaxOffset = 4095;
4496 break;
4497 // post/pre inc
4498 case AArch64::STRQpre:
4499 case AArch64::LDRQpost:
4500 Scale = TypeSize::getFixed(1);
4501 Width = TypeSize::getFixed(16);
4502 MinOffset = -256;
4503 MaxOffset = 255;
4504 break;
4505 case AArch64::LDRDpost:
4506 case AArch64::LDRDpre:
4507 case AArch64::LDRXpost:
4508 case AArch64::LDRXpre:
4509 case AArch64::STRDpost:
4510 case AArch64::STRDpre:
4511 case AArch64::STRXpost:
4512 case AArch64::STRXpre:
4513 Scale = TypeSize::getFixed(1);
4514 Width = TypeSize::getFixed(8);
4515 MinOffset = -256;
4516 MaxOffset = 255;
4517 break;
4518 case AArch64::STRWpost:
4519 case AArch64::STRWpre:
4520 case AArch64::LDRWpost:
4521 case AArch64::LDRWpre:
4522 case AArch64::STRSpost:
4523 case AArch64::STRSpre:
4524 case AArch64::LDRSpost:
4525 case AArch64::LDRSpre:
4526 Scale = TypeSize::getFixed(1);
4527 Width = TypeSize::getFixed(4);
4528 MinOffset = -256;
4529 MaxOffset = 255;
4530 break;
4531 case AArch64::LDRHpost:
4532 case AArch64::LDRHpre:
4533 case AArch64::STRHpost:
4534 case AArch64::STRHpre:
4535 case AArch64::LDRHHpost:
4536 case AArch64::LDRHHpre:
4537 case AArch64::STRHHpost:
4538 case AArch64::STRHHpre:
4539 Scale = TypeSize::getFixed(1);
4540 Width = TypeSize::getFixed(2);
4541 MinOffset = -256;
4542 MaxOffset = 255;
4543 break;
4544 case AArch64::LDRBpost:
4545 case AArch64::LDRBpre:
4546 case AArch64::STRBpost:
4547 case AArch64::STRBpre:
4548 case AArch64::LDRBBpost:
4549 case AArch64::LDRBBpre:
4550 case AArch64::STRBBpost:
4551 case AArch64::STRBBpre:
4552 Scale = TypeSize::getFixed(1);
4553 Width = TypeSize::getFixed(1);
4554 MinOffset = -256;
4555 MaxOffset = 255;
4556 break;
4557 // Unscaled
4558 case AArch64::LDURQi:
4559 case AArch64::STURQi:
4560 Scale = TypeSize::getFixed(1);
4561 Width = TypeSize::getFixed(16);
4562 MinOffset = -256;
4563 MaxOffset = 255;
4564 break;
4565 case AArch64::LDURXi:
4566 case AArch64::LDURDi:
4567 case AArch64::LDAPURXi:
4568 case AArch64::STURXi:
4569 case AArch64::STURDi:
4570 case AArch64::STLURXi:
4571 case AArch64::PRFUMi:
4572 Scale = TypeSize::getFixed(1);
4573 Width = TypeSize::getFixed(8);
4574 MinOffset = -256;
4575 MaxOffset = 255;
4576 break;
4577 case AArch64::LDURWi:
4578 case AArch64::LDURSi:
4579 case AArch64::LDURSWi:
4580 case AArch64::LDAPURi:
4581 case AArch64::LDAPURSWi:
4582 case AArch64::STURWi:
4583 case AArch64::STURSi:
4584 case AArch64::STLURWi:
4585 Scale = TypeSize::getFixed(1);
4586 Width = TypeSize::getFixed(4);
4587 MinOffset = -256;
4588 MaxOffset = 255;
4589 break;
4590 case AArch64::LDURHi:
4591 case AArch64::LDURHHi:
4592 case AArch64::LDURSHXi:
4593 case AArch64::LDURSHWi:
4594 case AArch64::LDAPURHi:
4595 case AArch64::LDAPURSHWi:
4596 case AArch64::LDAPURSHXi:
4597 case AArch64::STURHi:
4598 case AArch64::STURHHi:
4599 case AArch64::STLURHi:
4600 Scale = TypeSize::getFixed(1);
4601 Width = TypeSize::getFixed(2);
4602 MinOffset = -256;
4603 MaxOffset = 255;
4604 break;
4605 case AArch64::LDURBi:
4606 case AArch64::LDURBBi:
4607 case AArch64::LDURSBXi:
4608 case AArch64::LDURSBWi:
4609 case AArch64::LDAPURBi:
4610 case AArch64::LDAPURSBWi:
4611 case AArch64::LDAPURSBXi:
4612 case AArch64::STURBi:
4613 case AArch64::STURBBi:
4614 case AArch64::STLURBi:
4615 Scale = TypeSize::getFixed(1);
4616 Width = TypeSize::getFixed(1);
4617 MinOffset = -256;
4618 MaxOffset = 255;
4619 break;
4620 // LDP / STP (including pre/post inc)
4621 case AArch64::LDPQi:
4622 case AArch64::LDNPQi:
4623 case AArch64::STPQi:
4624 case AArch64::STNPQi:
4625 case AArch64::LDPQpost:
4626 case AArch64::LDPQpre:
4627 case AArch64::STPQpost:
4628 case AArch64::STPQpre:
4629 Scale = TypeSize::getFixed(16);
4630 Width = TypeSize::getFixed(16 * 2);
4631 MinOffset = -64;
4632 MaxOffset = 63;
4633 break;
4634 case AArch64::LDPXi:
4635 case AArch64::LDPDi:
4636 case AArch64::LDNPXi:
4637 case AArch64::LDNPDi:
4638 case AArch64::STPXi:
4639 case AArch64::STPDi:
4640 case AArch64::STNPXi:
4641 case AArch64::STNPDi:
4642 case AArch64::LDPDpost:
4643 case AArch64::LDPDpre:
4644 case AArch64::LDPXpost:
4645 case AArch64::LDPXpre:
4646 case AArch64::STPDpost:
4647 case AArch64::STPDpre:
4648 case AArch64::STPXpost:
4649 case AArch64::STPXpre:
4650 Scale = TypeSize::getFixed(8);
4651 Width = TypeSize::getFixed(8 * 2);
4652 MinOffset = -64;
4653 MaxOffset = 63;
4654 break;
4655 case AArch64::LDPWi:
4656 case AArch64::LDPSi:
4657 case AArch64::LDNPWi:
4658 case AArch64::LDNPSi:
4659 case AArch64::STPWi:
4660 case AArch64::STPSi:
4661 case AArch64::STNPWi:
4662 case AArch64::STNPSi:
4663 case AArch64::LDPSpost:
4664 case AArch64::LDPSpre:
4665 case AArch64::LDPWpost:
4666 case AArch64::LDPWpre:
4667 case AArch64::STPSpost:
4668 case AArch64::STPSpre:
4669 case AArch64::STPWpost:
4670 case AArch64::STPWpre:
4671 Scale = TypeSize::getFixed(4);
4672 Width = TypeSize::getFixed(4 * 2);
4673 MinOffset = -64;
4674 MaxOffset = 63;
4675 break;
4676 case AArch64::StoreSwiftAsyncContext:
4677 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4678 Scale = TypeSize::getFixed(1);
4679 Width = TypeSize::getFixed(8);
4680 MinOffset = 0;
4681 MaxOffset = 4095;
4682 break;
4683 case AArch64::ADDG:
4684 Scale = TypeSize::getFixed(16);
4685 Width = TypeSize::getFixed(0);
4686 MinOffset = 0;
4687 MaxOffset = 63;
4688 break;
4689 case AArch64::TAGPstack:
4690 Scale = TypeSize::getFixed(16);
4691 Width = TypeSize::getFixed(0);
4692 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4693 // of 63 (not 64!).
4694 MinOffset = -63;
4695 MaxOffset = 63;
4696 break;
4697 case AArch64::LDG:
4698 case AArch64::STGi:
4699 case AArch64::STGPreIndex:
4700 case AArch64::STGPostIndex:
4701 case AArch64::STZGi:
4702 case AArch64::STZGPreIndex:
4703 case AArch64::STZGPostIndex:
4704 Scale = TypeSize::getFixed(16);
4705 Width = TypeSize::getFixed(16);
4706 MinOffset = -256;
4707 MaxOffset = 255;
4708 break;
4709 // SVE
4710 case AArch64::STR_ZZZZXI:
4711 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4712 case AArch64::LDR_ZZZZXI:
4713 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4714 Scale = TypeSize::getScalable(16);
4715 Width = TypeSize::getScalable(16 * 4);
4716 MinOffset = -256;
4717 MaxOffset = 252;
4718 break;
4719 case AArch64::STR_ZZZXI:
4720 case AArch64::LDR_ZZZXI:
4721 Scale = TypeSize::getScalable(16);
4722 Width = TypeSize::getScalable(16 * 3);
4723 MinOffset = -256;
4724 MaxOffset = 253;
4725 break;
4726 case AArch64::STR_ZZXI:
4727 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4728 case AArch64::LDR_ZZXI:
4729 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4730 Scale = TypeSize::getScalable(16);
4731 Width = TypeSize::getScalable(16 * 2);
4732 MinOffset = -256;
4733 MaxOffset = 254;
4734 break;
4735 case AArch64::LDR_PXI:
4736 case AArch64::STR_PXI:
4737 Scale = TypeSize::getScalable(2);
4738 Width = TypeSize::getScalable(2);
4739 MinOffset = -256;
4740 MaxOffset = 255;
4741 break;
4742 case AArch64::LDR_PPXI:
4743 case AArch64::STR_PPXI:
4744 Scale = TypeSize::getScalable(2);
4745 Width = TypeSize::getScalable(2 * 2);
4746 MinOffset = -256;
4747 MaxOffset = 254;
4748 break;
4749 case AArch64::LDR_ZXI:
4750 case AArch64::STR_ZXI:
4751 Scale = TypeSize::getScalable(16);
4752 Width = TypeSize::getScalable(16);
4753 MinOffset = -256;
4754 MaxOffset = 255;
4755 break;
4756 case AArch64::LD1B_IMM:
4757 case AArch64::LD1H_IMM:
4758 case AArch64::LD1W_IMM:
4759 case AArch64::LD1D_IMM:
4760 case AArch64::LDNT1B_ZRI:
4761 case AArch64::LDNT1H_ZRI:
4762 case AArch64::LDNT1W_ZRI:
4763 case AArch64::LDNT1D_ZRI:
4764 case AArch64::ST1B_IMM:
4765 case AArch64::ST1H_IMM:
4766 case AArch64::ST1W_IMM:
4767 case AArch64::ST1D_IMM:
4768 case AArch64::STNT1B_ZRI:
4769 case AArch64::STNT1H_ZRI:
4770 case AArch64::STNT1W_ZRI:
4771 case AArch64::STNT1D_ZRI:
4772 case AArch64::LDNF1B_IMM:
4773 case AArch64::LDNF1H_IMM:
4774 case AArch64::LDNF1W_IMM:
4775 case AArch64::LDNF1D_IMM:
4776 // A full vectors worth of data
4777 // Width = mbytes * elements
4778 Scale = TypeSize::getScalable(16);
4779 Width = TypeSize::getScalable(16);
4780 MinOffset = -8;
4781 MaxOffset = 7;
4782 break;
4783 case AArch64::LD2B_IMM:
4784 case AArch64::LD2H_IMM:
4785 case AArch64::LD2W_IMM:
4786 case AArch64::LD2D_IMM:
4787 case AArch64::ST2B_IMM:
4788 case AArch64::ST2H_IMM:
4789 case AArch64::ST2W_IMM:
4790 case AArch64::ST2D_IMM:
4791 Scale = TypeSize::getScalable(32);
4792 Width = TypeSize::getScalable(16 * 2);
4793 MinOffset = -8;
4794 MaxOffset = 7;
4795 break;
4796 case AArch64::LD3B_IMM:
4797 case AArch64::LD3H_IMM:
4798 case AArch64::LD3W_IMM:
4799 case AArch64::LD3D_IMM:
4800 case AArch64::ST3B_IMM:
4801 case AArch64::ST3H_IMM:
4802 case AArch64::ST3W_IMM:
4803 case AArch64::ST3D_IMM:
4804 Scale = TypeSize::getScalable(48);
4805 Width = TypeSize::getScalable(16 * 3);
4806 MinOffset = -8;
4807 MaxOffset = 7;
4808 break;
4809 case AArch64::LD4B_IMM:
4810 case AArch64::LD4H_IMM:
4811 case AArch64::LD4W_IMM:
4812 case AArch64::LD4D_IMM:
4813 case AArch64::ST4B_IMM:
4814 case AArch64::ST4H_IMM:
4815 case AArch64::ST4W_IMM:
4816 case AArch64::ST4D_IMM:
4817 Scale = TypeSize::getScalable(64);
4818 Width = TypeSize::getScalable(16 * 4);
4819 MinOffset = -8;
4820 MaxOffset = 7;
4821 break;
4822 case AArch64::LD1B_H_IMM:
4823 case AArch64::LD1SB_H_IMM:
4824 case AArch64::LD1H_S_IMM:
4825 case AArch64::LD1SH_S_IMM:
4826 case AArch64::LD1W_D_IMM:
4827 case AArch64::LD1SW_D_IMM:
4828 case AArch64::ST1B_H_IMM:
4829 case AArch64::ST1H_S_IMM:
4830 case AArch64::ST1W_D_IMM:
4831 case AArch64::LDNF1B_H_IMM:
4832 case AArch64::LDNF1SB_H_IMM:
4833 case AArch64::LDNF1H_S_IMM:
4834 case AArch64::LDNF1SH_S_IMM:
4835 case AArch64::LDNF1W_D_IMM:
4836 case AArch64::LDNF1SW_D_IMM:
4837 // A half vector worth of data
4838 // Width = mbytes * elements
4839 Scale = TypeSize::getScalable(8);
4840 Width = TypeSize::getScalable(8);
4841 MinOffset = -8;
4842 MaxOffset = 7;
4843 break;
4844 case AArch64::LD1B_S_IMM:
4845 case AArch64::LD1SB_S_IMM:
4846 case AArch64::LD1H_D_IMM:
4847 case AArch64::LD1SH_D_IMM:
4848 case AArch64::ST1B_S_IMM:
4849 case AArch64::ST1H_D_IMM:
4850 case AArch64::LDNF1B_S_IMM:
4851 case AArch64::LDNF1SB_S_IMM:
4852 case AArch64::LDNF1H_D_IMM:
4853 case AArch64::LDNF1SH_D_IMM:
4854 // A quarter vector worth of data
4855 // Width = mbytes * elements
4856 Scale = TypeSize::getScalable(4);
4857 Width = TypeSize::getScalable(4);
4858 MinOffset = -8;
4859 MaxOffset = 7;
4860 break;
4861 case AArch64::LD1B_D_IMM:
4862 case AArch64::LD1SB_D_IMM:
4863 case AArch64::ST1B_D_IMM:
4864 case AArch64::LDNF1B_D_IMM:
4865 case AArch64::LDNF1SB_D_IMM:
4866 // A eighth vector worth of data
4867 // Width = mbytes * elements
4868 Scale = TypeSize::getScalable(2);
4869 Width = TypeSize::getScalable(2);
4870 MinOffset = -8;
4871 MaxOffset = 7;
4872 break;
4873 case AArch64::ST2Gi:
4874 case AArch64::ST2GPreIndex:
4875 case AArch64::ST2GPostIndex:
4876 case AArch64::STZ2Gi:
4877 case AArch64::STZ2GPreIndex:
4878 case AArch64::STZ2GPostIndex:
4879 Scale = TypeSize::getFixed(16);
4880 Width = TypeSize::getFixed(32);
4881 MinOffset = -256;
4882 MaxOffset = 255;
4883 break;
4884 case AArch64::STGPi:
4885 case AArch64::STGPpost:
4886 case AArch64::STGPpre:
4887 Scale = TypeSize::getFixed(16);
4888 Width = TypeSize::getFixed(16);
4889 MinOffset = -64;
4890 MaxOffset = 63;
4891 break;
4892 case AArch64::LD1RB_IMM:
4893 case AArch64::LD1RB_H_IMM:
4894 case AArch64::LD1RB_S_IMM:
4895 case AArch64::LD1RB_D_IMM:
4896 case AArch64::LD1RSB_H_IMM:
4897 case AArch64::LD1RSB_S_IMM:
4898 case AArch64::LD1RSB_D_IMM:
4899 Scale = TypeSize::getFixed(1);
4900 Width = TypeSize::getFixed(1);
4901 MinOffset = 0;
4902 MaxOffset = 63;
4903 break;
4904 case AArch64::LD1RH_IMM:
4905 case AArch64::LD1RH_S_IMM:
4906 case AArch64::LD1RH_D_IMM:
4907 case AArch64::LD1RSH_S_IMM:
4908 case AArch64::LD1RSH_D_IMM:
4909 Scale = TypeSize::getFixed(2);
4910 Width = TypeSize::getFixed(2);
4911 MinOffset = 0;
4912 MaxOffset = 63;
4913 break;
4914 case AArch64::LD1RW_IMM:
4915 case AArch64::LD1RW_D_IMM:
4916 case AArch64::LD1RSW_IMM:
4917 Scale = TypeSize::getFixed(4);
4918 Width = TypeSize::getFixed(4);
4919 MinOffset = 0;
4920 MaxOffset = 63;
4921 break;
4922 case AArch64::LD1RD_IMM:
4923 Scale = TypeSize::getFixed(8);
4924 Width = TypeSize::getFixed(8);
4925 MinOffset = 0;
4926 MaxOffset = 63;
4927 break;
4928 }
4929
4930 return true;
4931}
4932
4933// Scaling factor for unscaled load or store.
4935 switch (Opc) {
4936 default:
4937 llvm_unreachable("Opcode has unknown scale!");
4938 case AArch64::LDRBBui:
4939 case AArch64::LDURBBi:
4940 case AArch64::LDRSBWui:
4941 case AArch64::LDURSBWi:
4942 case AArch64::STRBBui:
4943 case AArch64::STURBBi:
4944 return 1;
4945 case AArch64::LDRHHui:
4946 case AArch64::LDURHHi:
4947 case AArch64::LDRSHWui:
4948 case AArch64::LDURSHWi:
4949 case AArch64::STRHHui:
4950 case AArch64::STURHHi:
4951 return 2;
4952 case AArch64::LDRSui:
4953 case AArch64::LDURSi:
4954 case AArch64::LDRSpre:
4955 case AArch64::LDRSWui:
4956 case AArch64::LDURSWi:
4957 case AArch64::LDRSWpre:
4958 case AArch64::LDRWpre:
4959 case AArch64::LDRWui:
4960 case AArch64::LDURWi:
4961 case AArch64::STRSui:
4962 case AArch64::STURSi:
4963 case AArch64::STRSpre:
4964 case AArch64::STRWui:
4965 case AArch64::STURWi:
4966 case AArch64::STRWpre:
4967 case AArch64::LDPSi:
4968 case AArch64::LDPSWi:
4969 case AArch64::LDPWi:
4970 case AArch64::STPSi:
4971 case AArch64::STPWi:
4972 return 4;
4973 case AArch64::LDRDui:
4974 case AArch64::LDURDi:
4975 case AArch64::LDRDpre:
4976 case AArch64::LDRXui:
4977 case AArch64::LDURXi:
4978 case AArch64::LDRXpre:
4979 case AArch64::STRDui:
4980 case AArch64::STURDi:
4981 case AArch64::STRDpre:
4982 case AArch64::STRXui:
4983 case AArch64::STURXi:
4984 case AArch64::STRXpre:
4985 case AArch64::LDPDi:
4986 case AArch64::LDPXi:
4987 case AArch64::STPDi:
4988 case AArch64::STPXi:
4989 return 8;
4990 case AArch64::LDRQui:
4991 case AArch64::LDURQi:
4992 case AArch64::STRQui:
4993 case AArch64::STURQi:
4994 case AArch64::STRQpre:
4995 case AArch64::LDPQi:
4996 case AArch64::LDRQpre:
4997 case AArch64::STPQi:
4998 case AArch64::STGi:
4999 case AArch64::STZGi:
5000 case AArch64::ST2Gi:
5001 case AArch64::STZ2Gi:
5002 case AArch64::STGPi:
5003 return 16;
5004 }
5005}
5006
5008 switch (MI.getOpcode()) {
5009 default:
5010 return false;
5011 case AArch64::LDRWpre:
5012 case AArch64::LDRXpre:
5013 case AArch64::LDRSWpre:
5014 case AArch64::LDRSpre:
5015 case AArch64::LDRDpre:
5016 case AArch64::LDRQpre:
5017 return true;
5018 }
5019}
5020
5022 switch (MI.getOpcode()) {
5023 default:
5024 return false;
5025 case AArch64::STRWpre:
5026 case AArch64::STRXpre:
5027 case AArch64::STRSpre:
5028 case AArch64::STRDpre:
5029 case AArch64::STRQpre:
5030 return true;
5031 }
5032}
5033
5035 return isPreLd(MI) || isPreSt(MI);
5036}
5037
5039 switch (MI.getOpcode()) {
5040 default:
5041 return false;
5042 case AArch64::LDPSi:
5043 case AArch64::LDPSWi:
5044 case AArch64::LDPDi:
5045 case AArch64::LDPQi:
5046 case AArch64::LDPWi:
5047 case AArch64::LDPXi:
5048 case AArch64::STPSi:
5049 case AArch64::STPDi:
5050 case AArch64::STPQi:
5051 case AArch64::STPWi:
5052 case AArch64::STPXi:
5053 case AArch64::STGPi:
5054 return true;
5055 }
5056}
5057
5059 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
5060 unsigned Idx =
5062 : 1;
5063 return MI.getOperand(Idx);
5064}
5065
5066const MachineOperand &
5068 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
5069 unsigned Idx =
5071 : 2;
5072 return MI.getOperand(Idx);
5073}
5074
5075const MachineOperand &
5077 switch (MI.getOpcode()) {
5078 default:
5079 llvm_unreachable("Unexpected opcode");
5080 case AArch64::LDRBroX:
5081 case AArch64::LDRBBroX:
5082 case AArch64::LDRSBXroX:
5083 case AArch64::LDRSBWroX:
5084 case AArch64::LDRHroX:
5085 case AArch64::LDRHHroX:
5086 case AArch64::LDRSHXroX:
5087 case AArch64::LDRSHWroX:
5088 case AArch64::LDRWroX:
5089 case AArch64::LDRSroX:
5090 case AArch64::LDRSWroX:
5091 case AArch64::LDRDroX:
5092 case AArch64::LDRXroX:
5093 case AArch64::LDRQroX:
5094 return MI.getOperand(4);
5095 }
5096}
5097
5099 Register Reg) {
5100 if (MI.getParent() == nullptr)
5101 return nullptr;
5102 const MachineFunction *MF = MI.getParent()->getParent();
5103 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
5104}
5105
5107 auto IsHFPR = [&](const MachineOperand &Op) {
5108 if (!Op.isReg())
5109 return false;
5110 auto Reg = Op.getReg();
5111 if (Reg.isPhysical())
5112 return AArch64::FPR16RegClass.contains(Reg);
5113 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5114 return TRC == &AArch64::FPR16RegClass ||
5115 TRC == &AArch64::FPR16_loRegClass;
5116 };
5117 return llvm::any_of(MI.operands(), IsHFPR);
5118}
5119
5121 auto IsQFPR = [&](const MachineOperand &Op) {
5122 if (!Op.isReg())
5123 return false;
5124 auto Reg = Op.getReg();
5125 if (Reg.isPhysical())
5126 return AArch64::FPR128RegClass.contains(Reg);
5127 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5128 return TRC == &AArch64::FPR128RegClass ||
5129 TRC == &AArch64::FPR128_loRegClass;
5130 };
5131 return llvm::any_of(MI.operands(), IsQFPR);
5132}
5133
5135 switch (MI.getOpcode()) {
5136 case AArch64::BRK:
5137 case AArch64::HLT:
5138 case AArch64::PACIASP:
5139 case AArch64::PACIBSP:
5140 // Implicit BTI behavior.
5141 return true;
5142 case AArch64::PAUTH_PROLOGUE:
5143 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
5144 return true;
5145 case AArch64::HINT: {
5146 unsigned Imm = MI.getOperand(0).getImm();
5147 // Explicit BTI instruction.
5148 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
5149 return true;
5150 // PACI(A|B)SP instructions.
5151 if (Imm == 25 || Imm == 27)
5152 return true;
5153 return false;
5154 }
5155 default:
5156 return false;
5157 }
5158}
5159
5161 if (Reg == 0)
5162 return false;
5163 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
5164 return AArch64::FPR128RegClass.contains(Reg) ||
5165 AArch64::FPR64RegClass.contains(Reg) ||
5166 AArch64::FPR32RegClass.contains(Reg) ||
5167 AArch64::FPR16RegClass.contains(Reg) ||
5168 AArch64::FPR8RegClass.contains(Reg);
5169}
5170
5172 auto IsFPR = [&](const MachineOperand &Op) {
5173 if (!Op.isReg())
5174 return false;
5175 auto Reg = Op.getReg();
5176 if (Reg.isPhysical())
5177 return isFpOrNEON(Reg);
5178
5179 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5180 return TRC == &AArch64::FPR128RegClass ||
5181 TRC == &AArch64::FPR128_loRegClass ||
5182 TRC == &AArch64::FPR64RegClass ||
5183 TRC == &AArch64::FPR64_loRegClass ||
5184 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
5185 TRC == &AArch64::FPR8RegClass;
5186 };
5187 return llvm::any_of(MI.operands(), IsFPR);
5188}
5189
5190// Scale the unscaled offsets. Returns false if the unscaled offset can't be
5191// scaled.
5192static bool scaleOffset(unsigned Opc, int64_t &Offset) {
5194
5195 // If the byte-offset isn't a multiple of the stride, we can't scale this
5196 // offset.
5197 if (Offset % Scale != 0)
5198 return false;
5199
5200 // Convert the byte-offset used by unscaled into an "element" offset used
5201 // by the scaled pair load/store instructions.
5202 Offset /= Scale;
5203 return true;
5204}
5205
5206static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
5207 if (FirstOpc == SecondOpc)
5208 return true;
5209 // We can also pair sign-ext and zero-ext instructions.
5210 switch (FirstOpc) {
5211 default:
5212 return false;
5213 case AArch64::STRSui:
5214 case AArch64::STURSi:
5215 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
5216 case AArch64::STRDui:
5217 case AArch64::STURDi:
5218 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
5219 case AArch64::STRQui:
5220 case AArch64::STURQi:
5221 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
5222 case AArch64::STRWui:
5223 case AArch64::STURWi:
5224 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
5225 case AArch64::STRXui:
5226 case AArch64::STURXi:
5227 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
5228 case AArch64::LDRSui:
5229 case AArch64::LDURSi:
5230 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
5231 case AArch64::LDRDui:
5232 case AArch64::LDURDi:
5233 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
5234 case AArch64::LDRQui:
5235 case AArch64::LDURQi:
5236 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
5237 case AArch64::LDRWui:
5238 case AArch64::LDURWi:
5239 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
5240 case AArch64::LDRSWui:
5241 case AArch64::LDURSWi:
5242 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
5243 case AArch64::LDRXui:
5244 case AArch64::LDURXi:
5245 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
5246 }
5247 // These instructions can't be paired based on their opcodes.
5248 return false;
5249}
5250
5251static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
5252 int64_t Offset1, unsigned Opcode1, int FI2,
5253 int64_t Offset2, unsigned Opcode2) {
5254 // Accesses through fixed stack object frame indices may access a different
5255 // fixed stack slot. Check that the object offsets + offsets match.
5256 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
5257 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
5258 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
5259 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
5260 // Convert to scaled object offsets.
5261 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
5262 if (ObjectOffset1 % Scale1 != 0)
5263 return false;
5264 ObjectOffset1 /= Scale1;
5265 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
5266 if (ObjectOffset2 % Scale2 != 0)
5267 return false;
5268 ObjectOffset2 /= Scale2;
5269 ObjectOffset1 += Offset1;
5270 ObjectOffset2 += Offset2;
5271 return ObjectOffset1 + 1 == ObjectOffset2;
5272 }
5273
5274 return FI1 == FI2;
5275}
5276
5277/// Detect opportunities for ldp/stp formation.
5278///
5279/// Only called for LdSt for which getMemOperandWithOffset returns true.
5281 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
5282 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
5283 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
5284 unsigned NumBytes) const {
5285 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
5286 const MachineOperand &BaseOp1 = *BaseOps1.front();
5287 const MachineOperand &BaseOp2 = *BaseOps2.front();
5288 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
5289 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
5290 if (BaseOp1.getType() != BaseOp2.getType())
5291 return false;
5292
5293 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
5294 "Only base registers and frame indices are supported.");
5295
5296 // Check for both base regs and base FI.
5297 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
5298 return false;
5299
5300 // Only cluster up to a single pair.
5301 if (ClusterSize > 2)
5302 return false;
5303
5304 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
5305 return false;
5306
5307 // Can we pair these instructions based on their opcodes?
5308 unsigned FirstOpc = FirstLdSt.getOpcode();
5309 unsigned SecondOpc = SecondLdSt.getOpcode();
5310 if (!canPairLdStOpc(FirstOpc, SecondOpc))
5311 return false;
5312
5313 // Can't merge volatiles or load/stores that have a hint to avoid pair
5314 // formation, for example.
5315 if (!isCandidateToMergeOrPair(FirstLdSt) ||
5316 !isCandidateToMergeOrPair(SecondLdSt))
5317 return false;
5318
5319 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
5320 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
5321 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
5322 return false;
5323
5324 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
5325 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
5326 return false;
5327
5328 // Pairwise instructions have a 7-bit signed offset field.
5329 if (Offset1 > 63 || Offset1 < -64)
5330 return false;
5331
5332 // The caller should already have ordered First/SecondLdSt by offset.
5333 // Note: except for non-equal frame index bases
5334 if (BaseOp1.isFI()) {
5335 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
5336 "Caller should have ordered offsets.");
5337
5338 const MachineFrameInfo &MFI =
5339 FirstLdSt.getParent()->getParent()->getFrameInfo();
5340 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
5341 BaseOp2.getIndex(), Offset2, SecondOpc);
5342 }
5343
5344 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
5345
5346 return Offset1 + 1 == Offset2;
5347}
5348
5350 MCRegister Reg, unsigned SubIdx,
5351 RegState State,
5352 const TargetRegisterInfo *TRI) {
5353 if (!SubIdx)
5354 return MIB.addReg(Reg, State);
5355
5356 if (Reg.isPhysical())
5357 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
5358 return MIB.addReg(Reg, State, SubIdx);
5359}
5360
5361static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5362 unsigned NumRegs) {
5363 // We really want the positive remainder mod 32 here, that happens to be
5364 // easily obtainable with a mask.
5365 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5366}
5367
5370 const DebugLoc &DL, MCRegister DestReg,
5371 MCRegister SrcReg, bool KillSrc,
5372 unsigned Opcode,
5373 ArrayRef<unsigned> Indices) const {
5374 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5376 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5377 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5378 unsigned NumRegs = Indices.size();
5379
5380 int SubReg = 0, End = NumRegs, Incr = 1;
5381 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5382 SubReg = NumRegs - 1;
5383 End = -1;
5384 Incr = -1;
5385 }
5386
5387 for (; SubReg != End; SubReg += Incr) {
5388 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5389 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5390 AddSubReg(MIB, SrcReg, Indices[SubReg], {}, TRI);
5391 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5392 }
5393}
5394
5397 const DebugLoc &DL, MCRegister DestReg,
5398 MCRegister SrcReg, bool KillSrc,
5399 unsigned Opcode, unsigned ZeroReg,
5400 llvm::ArrayRef<unsigned> Indices) const {
5402 unsigned NumRegs = Indices.size();
5403
5404#ifndef NDEBUG
5405 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5406 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5407 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5408 "GPR reg sequences should not be able to overlap");
5409#endif
5410
5411 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5412 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5413 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5414 MIB.addReg(ZeroReg);
5415 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5416 MIB.addImm(0);
5417 }
5418}
5419
5420/// Returns true if the instruction at I is in a streaming call site region,
5421/// within a single basic block.
5422/// A "call site streaming region" starts after smstart and ends at smstop
5423/// around a call to a streaming function. This walks backward from I.
5426 MachineFunction &MF = *MBB.getParent();
5428 if (!AFI->hasStreamingModeChanges())
5429 return false;
5430 // Walk backwards to find smstart/smstop
5431 for (MachineInstr &MI : reverse(make_range(MBB.begin(), I))) {
5432 unsigned Opc = MI.getOpcode();
5433 if (Opc == AArch64::MSRpstatesvcrImm1 || Opc == AArch64::MSRpstatePseudo) {
5434 // Check if this is SM change (not ZA)
5435 int64_t PState = MI.getOperand(0).getImm();
5436 if (PState == AArch64SVCR::SVCRSM || PState == AArch64SVCR::SVCRSMZA) {
5437 // Operand 1 is 1 for start, 0 for stop
5438 return MI.getOperand(1).getImm() == 1;
5439 }
5440 }
5441 }
5442 return false;
5443}
5444
5445/// Returns true if in a streaming call site region without SME-FA64.
5446static bool mustAvoidNeonAtMBBI(const AArch64Subtarget &Subtarget,
5449 return !Subtarget.hasSMEFA64() && isInStreamingCallSiteRegion(MBB, I);
5450}
5451
5454 const DebugLoc &DL, Register DestReg,
5455 Register SrcReg, bool KillSrc,
5456 bool RenamableDest,
5457 bool RenamableSrc) const {
5458 ++NumCopyInstrs;
5459 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5460 AArch64::GPR32spRegClass.contains(SrcReg)) {
5461 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5462 // If either operand is WSP, expand to ADD #0.
5463 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5464 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5465 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5466 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5467 &AArch64::GPR64spRegClass);
5468 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5469 &AArch64::GPR64spRegClass);
5470 // This instruction is reading and writing X registers. This may upset
5471 // the register scavenger and machine verifier, so we need to indicate
5472 // that we are reading an undefined value from SrcRegX, but a proper
5473 // value from SrcReg.
5474 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5475 .addReg(SrcRegX, RegState::Undef)
5476 .addImm(0)
5478 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5479 ++NumZCRegMoveInstrsGPR;
5480 } else {
5481 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5482 .addReg(SrcReg, getKillRegState(KillSrc))
5483 .addImm(0)
5485 if (Subtarget.hasZeroCycleRegMoveGPR32())
5486 ++NumZCRegMoveInstrsGPR;
5487 }
5488 } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5489 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5490 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5491 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5492 &AArch64::GPR64spRegClass);
5493 assert(DestRegX.isValid() && "Destination super-reg not valid");
5494 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5495 &AArch64::GPR64spRegClass);
5496 assert(SrcRegX.isValid() && "Source super-reg not valid");
5497 // This instruction is reading and writing X registers. This may upset
5498 // the register scavenger and machine verifier, so we need to indicate
5499 // that we are reading an undefined value from SrcRegX, but a proper
5500 // value from SrcReg.
5501 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5502 .addReg(AArch64::XZR)
5503 .addReg(SrcRegX, RegState::Undef)
5504 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5505 ++NumZCRegMoveInstrsGPR;
5506 } else {
5507 // Otherwise, expand to ORR WZR.
5508 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5509 .addReg(AArch64::WZR)
5510 .addReg(SrcReg, getKillRegState(KillSrc));
5511 if (Subtarget.hasZeroCycleRegMoveGPR32())
5512 ++NumZCRegMoveInstrsGPR;
5513 }
5514 return;
5515 }
5516
5517 // GPR32 zeroing
5518 if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
5519 if (Subtarget.hasZeroCycleZeroingGPR64() &&
5520 !Subtarget.hasZeroCycleZeroingGPR32()) {
5521 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5522 &AArch64::GPR64spRegClass);
5523 assert(DestRegX.isValid() && "Destination super-reg not valid");
5524 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX)
5525 .addImm(0)
5527 ++NumZCZeroingInstrsGPR;
5528 } else if (Subtarget.hasZeroCycleZeroingGPR32()) {
5529 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5530 .addImm(0)
5532 ++NumZCZeroingInstrsGPR;
5533 } else {
5534 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5535 .addReg(AArch64::WZR)
5536 .addReg(AArch64::WZR);
5537 }
5538 return;
5539 }
5540
5541 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5542 AArch64::GPR64spRegClass.contains(SrcReg)) {
5543 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5544 // If either operand is SP, expand to ADD #0.
5545 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5546 .addReg(SrcReg, getKillRegState(KillSrc))
5547 .addImm(0)
5549 if (Subtarget.hasZeroCycleRegMoveGPR64())
5550 ++NumZCRegMoveInstrsGPR;
5551 } else {
5552 // Otherwise, expand to ORR XZR.
5553 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5554 .addReg(AArch64::XZR)
5555 .addReg(SrcReg, getKillRegState(KillSrc));
5556 if (Subtarget.hasZeroCycleRegMoveGPR64())
5557 ++NumZCRegMoveInstrsGPR;
5558 }
5559 return;
5560 }
5561
5562 // GPR64 zeroing
5563 if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
5564 if (Subtarget.hasZeroCycleZeroingGPR64()) {
5565 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5566 .addImm(0)
5568 ++NumZCZeroingInstrsGPR;
5569 } else {
5570 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5571 .addReg(AArch64::XZR)
5572 .addReg(AArch64::XZR);
5573 }
5574 return;
5575 }
5576
5577 // Copy a Predicate register by ORRing with itself.
5578 if (AArch64::PPRRegClass.contains(DestReg) &&
5579 AArch64::PPRRegClass.contains(SrcReg)) {
5580 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5581 "Unexpected SVE register.");
5582 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5583 .addReg(SrcReg) // Pg
5584 .addReg(SrcReg)
5585 .addReg(SrcReg, getKillRegState(KillSrc));
5586 return;
5587 }
5588
5589 // Copy a predicate-as-counter register by ORRing with itself as if it
5590 // were a regular predicate (mask) register.
5591 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5592 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5593 if (DestIsPNR || SrcIsPNR) {
5594 auto ToPPR = [](MCRegister R) -> MCRegister {
5595 return (R - AArch64::PN0) + AArch64::P0;
5596 };
5597 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5598 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5599
5600 if (PPRSrcReg != PPRDestReg) {
5601 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5602 .addReg(PPRSrcReg) // Pg
5603 .addReg(PPRSrcReg)
5604 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5605 if (DestIsPNR)
5606 NewMI.addDef(DestReg, RegState::Implicit);
5607 }
5608 return;
5609 }
5610
5611 // Copy a Z register by ORRing with itself.
5612 if (AArch64::ZPRRegClass.contains(DestReg) &&
5613 AArch64::ZPRRegClass.contains(SrcReg)) {
5614 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5615 "Unexpected SVE register.");
5616 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5617 .addReg(SrcReg)
5618 .addReg(SrcReg, getKillRegState(KillSrc));
5619 return;
5620 }
5621
5622 // Copy a Z register pair by copying the individual sub-registers.
5623 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5624 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5625 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5626 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5627 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5628 "Unexpected SVE register.");
5629 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5630 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5631 Indices);
5632 return;
5633 }
5634
5635 // Copy a Z register triple by copying the individual sub-registers.
5636 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5637 AArch64::ZPR3RegClass.contains(SrcReg)) {
5638 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5639 "Unexpected SVE register.");
5640 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5641 AArch64::zsub2};
5642 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5643 Indices);
5644 return;
5645 }
5646
5647 // Copy a Z register quad by copying the individual sub-registers.
5648 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5649 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5650 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5651 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5652 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5653 "Unexpected SVE register.");
5654 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5655 AArch64::zsub2, AArch64::zsub3};
5656 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5657 Indices);
5658 return;
5659 }
5660
5661 // Copy a DDDD register quad by copying the individual sub-registers.
5662 if (AArch64::DDDDRegClass.contains(DestReg) &&
5663 AArch64::DDDDRegClass.contains(SrcReg)) {
5664 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5665 AArch64::dsub2, AArch64::dsub3};
5666 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5667 Indices);
5668 return;
5669 }
5670
5671 // Copy a DDD register triple by copying the individual sub-registers.
5672 if (AArch64::DDDRegClass.contains(DestReg) &&
5673 AArch64::DDDRegClass.contains(SrcReg)) {
5674 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5675 AArch64::dsub2};
5676 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5677 Indices);
5678 return;
5679 }
5680
5681 // Copy a DD register pair by copying the individual sub-registers.
5682 if (AArch64::DDRegClass.contains(DestReg) &&
5683 AArch64::DDRegClass.contains(SrcReg)) {
5684 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5685 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5686 Indices);
5687 return;
5688 }
5689
5690 // Copy a QQQQ register quad by copying the individual sub-registers.
5691 if (AArch64::QQQQRegClass.contains(DestReg) &&
5692 AArch64::QQQQRegClass.contains(SrcReg)) {
5693 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5694 AArch64::qsub2, AArch64::qsub3};
5695 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5696 Indices);
5697 return;
5698 }
5699
5700 // Copy a QQQ register triple by copying the individual sub-registers.
5701 if (AArch64::QQQRegClass.contains(DestReg) &&
5702 AArch64::QQQRegClass.contains(SrcReg)) {
5703 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5704 AArch64::qsub2};
5705 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5706 Indices);
5707 return;
5708 }
5709
5710 // Copy a QQ register pair by copying the individual sub-registers.
5711 if (AArch64::QQRegClass.contains(DestReg) &&
5712 AArch64::QQRegClass.contains(SrcReg)) {
5713 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5714 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5715 Indices);
5716 return;
5717 }
5718
5719 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5720 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5721 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5722 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5723 AArch64::XZR, Indices);
5724 return;
5725 }
5726
5727 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5728 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5729 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5730 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5731 AArch64::WZR, Indices);
5732 return;
5733 }
5734
5735 if (AArch64::FPR128RegClass.contains(DestReg) &&
5736 AArch64::FPR128RegClass.contains(SrcReg)) {
5737 // In streaming regions, NEON is illegal but streaming-SVE is available.
5738 // Use SVE for copies if we're in a streaming region and SME is available.
5739 // With +sme-fa64, NEON is legal in streaming mode so we can use it.
5740 if ((Subtarget.isSVEorStreamingSVEAvailable() &&
5741 !Subtarget.isNeonAvailable()) ||
5742 mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5743 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5744 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5745 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5746 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5747 } else if (Subtarget.isNeonAvailable()) {
5748 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5749 .addReg(SrcReg)
5750 .addReg(SrcReg, getKillRegState(KillSrc));
5751 if (Subtarget.hasZeroCycleRegMoveFPR128())
5752 ++NumZCRegMoveInstrsFPR;
5753 } else {
5754 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5755 .addReg(AArch64::SP, RegState::Define)
5756 .addReg(SrcReg, getKillRegState(KillSrc))
5757 .addReg(AArch64::SP)
5758 .addImm(-16);
5759 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5760 .addReg(AArch64::SP, RegState::Define)
5761 .addReg(DestReg, RegState::Define)
5762 .addReg(AArch64::SP)
5763 .addImm(16);
5764 }
5765 return;
5766 }
5767
5768 if (AArch64::FPR64RegClass.contains(DestReg) &&
5769 AArch64::FPR64RegClass.contains(SrcReg)) {
5770 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5771 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5772 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
5773 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5774 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
5775 &AArch64::FPR128RegClass);
5776 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
5777 &AArch64::FPR128RegClass);
5778 // This instruction is reading and writing Q registers. This may upset
5779 // the register scavenger and machine verifier, so we need to indicate
5780 // that we are reading an undefined value from SrcRegQ, but a proper
5781 // value from SrcReg.
5782 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5783 .addReg(SrcRegQ, RegState::Undef)
5784 .addReg(SrcRegQ, RegState::Undef)
5785 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5786 ++NumZCRegMoveInstrsFPR;
5787 } else {
5788 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5789 .addReg(SrcReg, getKillRegState(KillSrc));
5790 if (Subtarget.hasZeroCycleRegMoveFPR64())
5791 ++NumZCRegMoveInstrsFPR;
5792 }
5793 return;
5794 }
5795
5796 if (AArch64::FPR32RegClass.contains(DestReg) &&
5797 AArch64::FPR32RegClass.contains(SrcReg)) {
5798 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5799 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5800 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
5801 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5802 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5803 &AArch64::FPR128RegClass);
5804 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5805 &AArch64::FPR128RegClass);
5806 // This instruction is reading and writing Q registers. This may upset
5807 // the register scavenger and machine verifier, so we need to indicate
5808 // that we are reading an undefined value from SrcRegQ, but a proper
5809 // value from SrcReg.
5810 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5811 .addReg(SrcRegQ, RegState::Undef)
5812 .addReg(SrcRegQ, RegState::Undef)
5813 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5814 ++NumZCRegMoveInstrsFPR;
5815 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5816 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5817 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5818 &AArch64::FPR64RegClass);
5819 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5820 &AArch64::FPR64RegClass);
5821 // This instruction is reading and writing D registers. This may upset
5822 // the register scavenger and machine verifier, so we need to indicate
5823 // that we are reading an undefined value from SrcRegD, but a proper
5824 // value from SrcReg.
5825 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5826 .addReg(SrcRegD, RegState::Undef)
5827 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5828 ++NumZCRegMoveInstrsFPR;
5829 } else {
5830 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5831 .addReg(SrcReg, getKillRegState(KillSrc));
5832 if (Subtarget.hasZeroCycleRegMoveFPR32())
5833 ++NumZCRegMoveInstrsFPR;
5834 }
5835 return;
5836 }
5837
5838 if (AArch64::FPR16RegClass.contains(DestReg) &&
5839 AArch64::FPR16RegClass.contains(SrcReg)) {
5840 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5841 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5842 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
5843 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5844 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5845 &AArch64::FPR128RegClass);
5846 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5847 &AArch64::FPR128RegClass);
5848 // This instruction is reading and writing Q registers. This may upset
5849 // the register scavenger and machine verifier, so we need to indicate
5850 // that we are reading an undefined value from SrcRegQ, but a proper
5851 // value from SrcReg.
5852 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5853 .addReg(SrcRegQ, RegState::Undef)
5854 .addReg(SrcRegQ, RegState::Undef)
5855 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5856 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5857 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5858 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5859 &AArch64::FPR64RegClass);
5860 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5861 &AArch64::FPR64RegClass);
5862 // This instruction is reading and writing D registers. This may upset
5863 // the register scavenger and machine verifier, so we need to indicate
5864 // that we are reading an undefined value from SrcRegD, but a proper
5865 // value from SrcReg.
5866 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5867 .addReg(SrcRegD, RegState::Undef)
5868 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5869 } else {
5870 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5871 &AArch64::FPR32RegClass);
5872 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5873 &AArch64::FPR32RegClass);
5874 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5875 .addReg(SrcReg, getKillRegState(KillSrc));
5876 }
5877 return;
5878 }
5879
5880 if (AArch64::FPR8RegClass.contains(DestReg) &&
5881 AArch64::FPR8RegClass.contains(SrcReg)) {
5882 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5883 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5884 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable() &&
5885 !mustAvoidNeonAtMBBI(Subtarget, MBB, I)) {
5886 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5887 &AArch64::FPR128RegClass);
5888 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5889 &AArch64::FPR128RegClass);
5890 // This instruction is reading and writing Q registers. This may upset
5891 // the register scavenger and machine verifier, so we need to indicate
5892 // that we are reading an undefined value from SrcRegQ, but a proper
5893 // value from SrcReg.
5894 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5895 .addReg(SrcRegQ, RegState::Undef)
5896 .addReg(SrcRegQ, RegState::Undef)
5897 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5898 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5899 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5900 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5901 &AArch64::FPR64RegClass);
5902 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5903 &AArch64::FPR64RegClass);
5904 // This instruction is reading and writing D registers. This may upset
5905 // the register scavenger and machine verifier, so we need to indicate
5906 // that we are reading an undefined value from SrcRegD, but a proper
5907 // value from SrcReg.
5908 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5909 .addReg(SrcRegD, RegState::Undef)
5910 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5911 } else {
5912 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5913 &AArch64::FPR32RegClass);
5914 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5915 &AArch64::FPR32RegClass);
5916 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5917 .addReg(SrcReg, getKillRegState(KillSrc));
5918 }
5919 return;
5920 }
5921
5922 // Copies between GPR64 and FPR64.
5923 if (AArch64::FPR64RegClass.contains(DestReg) &&
5924 AArch64::GPR64RegClass.contains(SrcReg)) {
5925 if (AArch64::XZR == SrcReg) {
5926 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5927 } else {
5928 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5929 .addReg(SrcReg, getKillRegState(KillSrc));
5930 }
5931 return;
5932 }
5933 if (AArch64::GPR64RegClass.contains(DestReg) &&
5934 AArch64::FPR64RegClass.contains(SrcReg)) {
5935 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5936 .addReg(SrcReg, getKillRegState(KillSrc));
5937 return;
5938 }
5939 // Copies between GPR32 and FPR32.
5940 if (AArch64::FPR32RegClass.contains(DestReg) &&
5941 AArch64::GPR32RegClass.contains(SrcReg)) {
5942 if (AArch64::WZR == SrcReg) {
5943 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5944 } else {
5945 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5946 .addReg(SrcReg, getKillRegState(KillSrc));
5947 }
5948 return;
5949 }
5950 if (AArch64::GPR32RegClass.contains(DestReg) &&
5951 AArch64::FPR32RegClass.contains(SrcReg)) {
5952 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5953 .addReg(SrcReg, getKillRegState(KillSrc));
5954 return;
5955 }
5956
5957 if (DestReg == AArch64::NZCV) {
5958 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5959 BuildMI(MBB, I, DL, get(AArch64::MSR))
5960 .addImm(AArch64SysReg::NZCV)
5961 .addReg(SrcReg, getKillRegState(KillSrc))
5962 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5963 return;
5964 }
5965
5966 if (SrcReg == AArch64::NZCV) {
5967 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5968 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5969 .addImm(AArch64SysReg::NZCV)
5970 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5971 return;
5972 }
5973
5974#ifndef NDEBUG
5975 errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)
5976 << "\n";
5977#endif
5978 llvm_unreachable("unimplemented reg-to-reg copy");
5979}
5980
5983 MachineBasicBlock::iterator InsertBefore,
5984 const MCInstrDesc &MCID,
5985 Register SrcReg, bool IsKill,
5986 unsigned SubIdx0, unsigned SubIdx1, int FI,
5987 MachineMemOperand *MMO) {
5988 Register SrcReg0 = SrcReg;
5989 Register SrcReg1 = SrcReg;
5990 if (SrcReg.isPhysical()) {
5991 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5992 SubIdx0 = 0;
5993 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5994 SubIdx1 = 0;
5995 }
5996 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5997 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5998 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5999 .addFrameIndex(FI)
6000 .addImm(0)
6001 .addMemOperand(MMO);
6002}
6003
6006 Register SrcReg, bool isKill, int FI,
6007 const TargetRegisterClass *RC,
6008 Register VReg,
6009 MachineInstr::MIFlag Flags) const {
6010 MachineFunction &MF = *MBB.getParent();
6011 MachineFrameInfo &MFI = MF.getFrameInfo();
6012
6014 MachineMemOperand *MMO =
6016 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
6017 unsigned Opc = 0;
6018 bool Offset = true;
6020 unsigned StackID = TargetStackID::Default;
6021 switch (RI.getSpillSize(*RC)) {
6022 case 1:
6023 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
6024 Opc = AArch64::STRBui;
6025 break;
6026 case 2: {
6027 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
6028 Opc = AArch64::STRHui;
6029 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
6030 AArch64::PPRRegClass.hasSubClassEq(RC)) {
6031 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6032 "Unexpected register store without SVE store instructions");
6033 Opc = AArch64::STR_PXI;
6035 }
6036 break;
6037 }
6038 case 4:
6039 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
6040 Opc = AArch64::STRWui;
6041 if (SrcReg.isVirtual())
6042 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
6043 else
6044 assert(SrcReg != AArch64::WSP);
6045 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6046 Opc = AArch64::STRSui;
6047 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
6048 Opc = AArch64::STR_PPXI;
6050 }
6051 break;
6052 case 8:
6053 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
6054 Opc = AArch64::STRXui;
6055 if (SrcReg.isVirtual())
6056 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6057 else
6058 assert(SrcReg != AArch64::SP);
6059 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
6060 Opc = AArch64::STRDui;
6061 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
6063 get(AArch64::STPWi), SrcReg, isKill,
6064 AArch64::sube32, AArch64::subo32, FI, MMO);
6065 return;
6066 }
6067 break;
6068 case 16:
6069 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
6070 Opc = AArch64::STRQui;
6071 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
6072 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6073 Opc = AArch64::ST1Twov1d;
6074 Offset = false;
6075 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
6077 get(AArch64::STPXi), SrcReg, isKill,
6078 AArch64::sube64, AArch64::subo64, FI, MMO);
6079 return;
6080 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
6081 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6082 "Unexpected register store without SVE store instructions");
6083 Opc = AArch64::STR_ZXI;
6085 }
6086 break;
6087 case 24:
6088 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
6089 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6090 Opc = AArch64::ST1Threev1d;
6091 Offset = false;
6092 }
6093 break;
6094 case 32:
6095 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6096 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6097 Opc = AArch64::ST1Fourv1d;
6098 Offset = false;
6099 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6100 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6101 Opc = AArch64::ST1Twov2d;
6102 Offset = false;
6103 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6104 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6105 "Unexpected register store without SVE store instructions");
6106 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
6108 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6109 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6110 "Unexpected register store without SVE store instructions");
6111 Opc = AArch64::STR_ZZXI;
6113 }
6114 break;
6115 case 48:
6116 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6117 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6118 Opc = AArch64::ST1Threev2d;
6119 Offset = false;
6120 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6121 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6122 "Unexpected register store without SVE store instructions");
6123 Opc = AArch64::STR_ZZZXI;
6125 }
6126 break;
6127 case 64:
6128 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6129 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
6130 Opc = AArch64::ST1Fourv2d;
6131 Offset = false;
6132 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6133 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6134 "Unexpected register store without SVE store instructions");
6135 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
6137 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6138 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6139 "Unexpected register store without SVE store instructions");
6140 Opc = AArch64::STR_ZZZZXI;
6142 }
6143 break;
6144 }
6145 assert(Opc && "Unknown register class");
6146 MFI.setStackID(FI, StackID);
6147
6149 .addReg(SrcReg, getKillRegState(isKill))
6150 .addFrameIndex(FI);
6151
6152 if (Offset)
6153 MI.addImm(0);
6154 if (PNRReg.isValid())
6155 MI.addDef(PNRReg, RegState::Implicit);
6156 MI.addMemOperand(MMO);
6157}
6158
6161 MachineBasicBlock::iterator InsertBefore,
6162 const MCInstrDesc &MCID,
6163 Register DestReg, unsigned SubIdx0,
6164 unsigned SubIdx1, int FI,
6165 MachineMemOperand *MMO) {
6166 Register DestReg0 = DestReg;
6167 Register DestReg1 = DestReg;
6168 bool IsUndef = true;
6169 if (DestReg.isPhysical()) {
6170 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
6171 SubIdx0 = 0;
6172 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
6173 SubIdx1 = 0;
6174 IsUndef = false;
6175 }
6176 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
6177 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
6178 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
6179 .addFrameIndex(FI)
6180 .addImm(0)
6181 .addMemOperand(MMO);
6182}
6183
6186 Register DestReg, int FI,
6187 const TargetRegisterClass *RC,
6188 Register VReg, unsigned SubReg,
6189 MachineInstr::MIFlag Flags) const {
6190 MachineFunction &MF = *MBB.getParent();
6191 MachineFrameInfo &MFI = MF.getFrameInfo();
6193 MachineMemOperand *MMO =
6195 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
6196
6197 unsigned Opc = 0;
6198 bool Offset = true;
6199 unsigned StackID = TargetStackID::Default;
6201 switch (TRI.getSpillSize(*RC)) {
6202 case 1:
6203 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
6204 Opc = AArch64::LDRBui;
6205 break;
6206 case 2: {
6207 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
6208 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
6209 Opc = AArch64::LDRHui;
6210 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
6211 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6212 "Unexpected register load without SVE load instructions");
6213 if (IsPNR)
6214 PNRReg = DestReg;
6215 Opc = AArch64::LDR_PXI;
6217 }
6218 break;
6219 }
6220 case 4:
6221 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
6222 Opc = AArch64::LDRWui;
6223 if (DestReg.isVirtual())
6224 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
6225 else
6226 assert(DestReg != AArch64::WSP);
6227 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6228 Opc = AArch64::LDRSui;
6229 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
6230 Opc = AArch64::LDR_PPXI;
6232 }
6233 break;
6234 case 8:
6235 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
6236 Opc = AArch64::LDRXui;
6237 if (DestReg.isVirtual())
6238 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
6239 else
6240 assert(DestReg != AArch64::SP);
6241 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
6242 Opc = AArch64::LDRDui;
6243 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
6245 get(AArch64::LDPWi), DestReg, AArch64::sube32,
6246 AArch64::subo32, FI, MMO);
6247 return;
6248 }
6249 break;
6250 case 16:
6251 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
6252 Opc = AArch64::LDRQui;
6253 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
6254 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6255 Opc = AArch64::LD1Twov1d;
6256 Offset = false;
6257 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
6259 get(AArch64::LDPXi), DestReg, AArch64::sube64,
6260 AArch64::subo64, FI, MMO);
6261 return;
6262 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
6263 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6264 "Unexpected register load without SVE load instructions");
6265 Opc = AArch64::LDR_ZXI;
6267 }
6268 break;
6269 case 24:
6270 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
6271 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6272 Opc = AArch64::LD1Threev1d;
6273 Offset = false;
6274 }
6275 break;
6276 case 32:
6277 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6278 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6279 Opc = AArch64::LD1Fourv1d;
6280 Offset = false;
6281 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6282 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6283 Opc = AArch64::LD1Twov2d;
6284 Offset = false;
6285 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6286 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6287 "Unexpected register load without SVE load instructions");
6288 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
6290 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6291 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6292 "Unexpected register load without SVE load instructions");
6293 Opc = AArch64::LDR_ZZXI;
6295 }
6296 break;
6297 case 48:
6298 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6299 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6300 Opc = AArch64::LD1Threev2d;
6301 Offset = false;
6302 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6303 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6304 "Unexpected register load without SVE load instructions");
6305 Opc = AArch64::LDR_ZZZXI;
6307 }
6308 break;
6309 case 64:
6310 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6311 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6312 Opc = AArch64::LD1Fourv2d;
6313 Offset = false;
6314 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6315 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6316 "Unexpected register load without SVE load instructions");
6317 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
6319 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6320 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6321 "Unexpected register load without SVE load instructions");
6322 Opc = AArch64::LDR_ZZZZXI;
6324 }
6325 break;
6326 }
6327
6328 assert(Opc && "Unknown register class");
6329 MFI.setStackID(FI, StackID);
6330
6332 .addReg(DestReg, getDefRegState(true))
6333 .addFrameIndex(FI);
6334 if (Offset)
6335 MI.addImm(0);
6336 if (PNRReg.isValid() && !PNRReg.isVirtual())
6337 MI.addDef(PNRReg, RegState::Implicit);
6338 MI.addMemOperand(MMO);
6339}
6340
6342 const MachineInstr &UseMI,
6343 const TargetRegisterInfo *TRI) {
6344 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
6345 UseMI.getIterator()),
6346 [TRI](const MachineInstr &I) {
6347 return I.modifiesRegister(AArch64::NZCV, TRI) ||
6348 I.readsRegister(AArch64::NZCV, TRI);
6349 });
6350}
6351
6352void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6353 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
6354 // The smallest scalable element supported by scaled SVE addressing
6355 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6356 // byte offset must always be a multiple of 2.
6357 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6358
6359 // VGSized offsets are divided by '2', because the VG register is the
6360 // the number of 64bit granules as opposed to 128bit vector chunks,
6361 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
6362 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
6363 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
6364 ByteSized = Offset.getFixed();
6365 VGSized = Offset.getScalable() / 2;
6366}
6367
6368/// Returns the offset in parts to which this frame offset can be
6369/// decomposed for the purpose of describing a frame offset.
6370/// For non-scalable offsets this is simply its byte size.
6371void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6372 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
6373 int64_t &NumDataVectors) {
6374 // The smallest scalable element supported by scaled SVE addressing
6375 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6376 // byte offset must always be a multiple of 2.
6377 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6378
6379 NumBytes = Offset.getFixed();
6380 NumDataVectors = 0;
6381 NumPredicateVectors = Offset.getScalable() / 2;
6382 // This method is used to get the offsets to adjust the frame offset.
6383 // If the function requires ADDPL to be used and needs more than two ADDPL
6384 // instructions, part of the offset is folded into NumDataVectors so that it
6385 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
6386 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
6387 NumPredicateVectors > 62) {
6388 NumDataVectors = NumPredicateVectors / 8;
6389 NumPredicateVectors -= NumDataVectors * 8;
6390 }
6391}
6392
6393// Convenience function to create a DWARF expression for: Constant `Operation`.
6394// This helper emits compact sequences for common cases. For example, for`-15
6395// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
6398 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
6399 // -Constant (1 to 31)
6400 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
6401 Operation = dwarf::DW_OP_minus;
6402 } else if (Constant >= 0 && Constant <= 31) {
6403 // Literal value 0 to 31
6404 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
6405 } else {
6406 // Signed constant
6407 Expr.push_back(dwarf::DW_OP_consts);
6409 }
6410 return Expr.push_back(Operation);
6411}
6412
6413// Convenience function to create a DWARF expression for a register.
6414static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
6415 Expr.push_back((char)dwarf::DW_OP_bregx);
6417 Expr.push_back(0);
6418}
6419
6420// Convenience function to create a DWARF expression for loading a register from
6421// a CFA offset.
6423 int64_t OffsetFromDefCFA) {
6424 // This assumes the top of the DWARF stack contains the CFA.
6425 Expr.push_back(dwarf::DW_OP_dup);
6426 // Add the offset to the register.
6427 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
6428 // Dereference the address (loads a 64 bit value)..
6429 Expr.push_back(dwarf::DW_OP_deref);
6430}
6431
6432// Convenience function to create a comment for
6433// (+/-) NumBytes (* RegScale)?
6434static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6435 StringRef RegScale = {}) {
6436 if (NumBytes) {
6437 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6438 if (!RegScale.empty())
6439 Comment << ' ' << RegScale;
6440 }
6441}
6442
6443// Creates an MCCFIInstruction:
6444// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6446 unsigned Reg,
6447 const StackOffset &Offset) {
6448 int64_t NumBytes, NumVGScaledBytes;
6449 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6450 NumVGScaledBytes);
6451 std::string CommentBuffer;
6452 llvm::raw_string_ostream Comment(CommentBuffer);
6453
6454 if (Reg == AArch64::SP)
6455 Comment << "sp";
6456 else if (Reg == AArch64::FP)
6457 Comment << "fp";
6458 else
6459 Comment << printReg(Reg, &TRI);
6460
6461 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6462 SmallString<64> Expr;
6463 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6464 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6465 // Reg + NumBytes
6466 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6467 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6468 appendOffsetComment(NumBytes, Comment);
6469 if (NumVGScaledBytes) {
6470 // + VG * NumVGScaledBytes
6471 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6472 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6473 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6474 Expr.push_back(dwarf::DW_OP_plus);
6475 }
6476
6477 // Wrap this into DW_CFA_def_cfa.
6478 SmallString<64> DefCfaExpr;
6479 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6480 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6481 DefCfaExpr.append(Expr.str());
6482 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6483 Comment.str());
6484}
6485
6487 unsigned FrameReg, unsigned Reg,
6488 const StackOffset &Offset,
6489 bool LastAdjustmentWasScalable) {
6490 if (Offset.getScalable())
6491 return createDefCFAExpression(TRI, Reg, Offset);
6492
6493 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6494 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6495
6496 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6497 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6498}
6499
6502 const StackOffset &OffsetFromDefCFA,
6503 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6504 int64_t NumBytes, NumVGScaledBytes;
6505 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6506 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6507
6508 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6509
6510 // Non-scalable offsets can use DW_CFA_offset directly.
6511 if (!NumVGScaledBytes)
6512 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6513
6514 std::string CommentBuffer;
6515 llvm::raw_string_ostream Comment(CommentBuffer);
6516 Comment << printReg(Reg, &TRI) << " @ cfa";
6517
6518 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6519 assert(NumVGScaledBytes && "Expected scalable offset");
6520 SmallString<64> OffsetExpr;
6521 // + VG * NumVGScaledBytes
6522 StringRef VGRegScale;
6523 if (IncomingVGOffsetFromDefCFA) {
6524 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6525 VGRegScale = "* IncomingVG";
6526 } else {
6527 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6528 VGRegScale = "* VG";
6529 }
6530 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6531 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6532 OffsetExpr.push_back(dwarf::DW_OP_plus);
6533 if (NumBytes) {
6534 // + NumBytes
6535 appendOffsetComment(NumBytes, Comment);
6536 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6537 }
6538
6539 // Wrap this into DW_CFA_expression
6540 SmallString<64> CfaExpr;
6541 CfaExpr.push_back(dwarf::DW_CFA_expression);
6542 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6543 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6544 CfaExpr.append(OffsetExpr.str());
6545
6546 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6547 Comment.str());
6548}
6549
6550// Helper function to emit a frame offset adjustment from a given
6551// pointer (SrcReg), stored into DestReg. This function is explicit
6552// in that it requires the opcode.
6555 const DebugLoc &DL, unsigned DestReg,
6556 unsigned SrcReg, int64_t Offset, unsigned Opc,
6557 const TargetInstrInfo *TII,
6558 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6559 bool *HasWinCFI, bool EmitCFAOffset,
6560 StackOffset CFAOffset, unsigned FrameReg) {
6561 int Sign = 1;
6562 unsigned MaxEncoding, ShiftSize;
6563 switch (Opc) {
6564 case AArch64::ADDXri:
6565 case AArch64::ADDSXri:
6566 case AArch64::SUBXri:
6567 case AArch64::SUBSXri:
6568 MaxEncoding = 0xfff;
6569 ShiftSize = 12;
6570 break;
6571 case AArch64::ADDVL_XXI:
6572 case AArch64::ADDPL_XXI:
6573 case AArch64::ADDSVL_XXI:
6574 case AArch64::ADDSPL_XXI:
6575 MaxEncoding = 31;
6576 ShiftSize = 0;
6577 if (Offset < 0) {
6578 MaxEncoding = 32;
6579 Sign = -1;
6580 Offset = -Offset;
6581 }
6582 break;
6583 default:
6584 llvm_unreachable("Unsupported opcode");
6585 }
6586
6587 // `Offset` can be in bytes or in "scalable bytes".
6588 int VScale = 1;
6589 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6590 VScale = 16;
6591 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6592 VScale = 2;
6593
6594 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6595 // scratch register. If DestReg is a virtual register, use it as the
6596 // scratch register; otherwise, create a new virtual register (to be
6597 // replaced by the scavenger at the end of PEI). That case can be optimized
6598 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6599 // register can be loaded with offset%8 and the add/sub can use an extending
6600 // instruction with LSL#3.
6601 // Currently the function handles any offsets but generates a poor sequence
6602 // of code.
6603 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6604
6605 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6606 Register TmpReg = DestReg;
6607 if (TmpReg == AArch64::XZR)
6608 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6609 &AArch64::GPR64RegClass);
6610 do {
6611 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6612 unsigned LocalShiftSize = 0;
6613 if (ThisVal > MaxEncoding) {
6614 ThisVal = ThisVal >> ShiftSize;
6615 LocalShiftSize = ShiftSize;
6616 }
6617 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6618 "Encoding cannot handle value that big");
6619
6620 Offset -= ThisVal << LocalShiftSize;
6621 if (Offset == 0)
6622 TmpReg = DestReg;
6623 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6624 .addReg(SrcReg)
6625 .addImm(Sign * (int)ThisVal);
6626 if (ShiftSize)
6627 MBI = MBI.addImm(
6629 MBI = MBI.setMIFlag(Flag);
6630
6631 auto Change =
6632 VScale == 1
6633 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6634 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6635 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6636 CFAOffset += Change;
6637 else
6638 CFAOffset -= Change;
6639 if (EmitCFAOffset && DestReg == TmpReg) {
6640 MachineFunction &MF = *MBB.getParent();
6641 const TargetSubtargetInfo &STI = MF.getSubtarget();
6642 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6643
6644 unsigned CFIIndex = MF.addFrameInst(
6645 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6646 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6647 .addCFIIndex(CFIIndex)
6648 .setMIFlags(Flag);
6649 }
6650
6651 if (NeedsWinCFI) {
6652 int Imm = (int)(ThisVal << LocalShiftSize);
6653 if (VScale != 1 && DestReg == AArch64::SP) {
6654 if (HasWinCFI)
6655 *HasWinCFI = true;
6656 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6657 .addImm(ThisVal)
6658 .setMIFlag(Flag);
6659 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6660 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6661 assert(VScale == 1 && "Expected non-scalable operation");
6662 if (HasWinCFI)
6663 *HasWinCFI = true;
6664 if (Imm == 0)
6665 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6666 else
6667 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6668 .addImm(Imm)
6669 .setMIFlag(Flag);
6670 assert(Offset == 0 && "Expected remaining offset to be zero to "
6671 "emit a single SEH directive");
6672 } else if (DestReg == AArch64::SP) {
6673 assert(VScale == 1 && "Expected non-scalable operation");
6674 if (HasWinCFI)
6675 *HasWinCFI = true;
6676 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6677 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6678 .addImm(Imm)
6679 .setMIFlag(Flag);
6680 }
6681 }
6682
6683 SrcReg = TmpReg;
6684 } while (Offset);
6685}
6686
6689 unsigned DestReg, unsigned SrcReg,
6691 MachineInstr::MIFlag Flag, bool SetNZCV,
6692 bool NeedsWinCFI, bool *HasWinCFI,
6693 bool EmitCFAOffset, StackOffset CFAOffset,
6694 unsigned FrameReg) {
6695 // If a function is marked as arm_locally_streaming, then the runtime value of
6696 // vscale in the prologue/epilogue is different the runtime value of vscale
6697 // in the function's body. To avoid having to consider multiple vscales,
6698 // we can use `addsvl` to allocate any scalable stack-slots, which under
6699 // most circumstances will be only locals, not callee-save slots.
6700 const Function &F = MBB.getParent()->getFunction();
6701 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6702
6703 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6704 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6705 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6706
6707 // Insert ADDSXri for scalable offset at the end.
6708 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6709 if (NeedsFinalDefNZCV)
6710 SetNZCV = false;
6711
6712 // First emit non-scalable frame offsets, or a simple 'mov'.
6713 if (Bytes || (!Offset && SrcReg != DestReg)) {
6714 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6715 "SP increment/decrement not 8-byte aligned");
6716 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6717 if (Bytes < 0) {
6718 Bytes = -Bytes;
6719 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6720 }
6721 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6722 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6723 FrameReg);
6724 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6725 ? StackOffset::getFixed(-Bytes)
6726 : StackOffset::getFixed(Bytes);
6727 SrcReg = DestReg;
6728 FrameReg = DestReg;
6729 }
6730
6731 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6732 "WinCFI can't allocate fractions of an SVE data vector");
6733
6734 if (NumDataVectors) {
6735 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6736 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6737 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6738 FrameReg);
6739 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6740 SrcReg = DestReg;
6741 }
6742
6743 if (NumPredicateVectors) {
6744 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6745 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6746 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6747 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6748 FrameReg);
6749 }
6750
6751 if (NeedsFinalDefNZCV)
6752 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6753 .addReg(DestReg)
6754 .addImm(0)
6755 .addImm(0);
6756}
6757
6760 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6761 LiveIntervals *LIS, VirtRegMap *VRM) const {
6762 // This is a bit of a hack. Consider this instruction:
6763 //
6764 // %0 = COPY %sp; GPR64all:%0
6765 //
6766 // We explicitly chose GPR64all for the virtual register so such a copy might
6767 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6768 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6769 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6770 //
6771 // To prevent that, we are going to constrain the %0 register class here.
6772 if (MI.isFullCopy()) {
6773 Register DstReg = MI.getOperand(0).getReg();
6774 Register SrcReg = MI.getOperand(1).getReg();
6775 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6776 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6777 return nullptr;
6778 }
6779 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6780 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6781 return nullptr;
6782 }
6783 // Nothing can folded with copy from/to NZCV.
6784 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6785 return nullptr;
6786 }
6787
6788 // Handle the case where a copy is being spilled or filled but the source
6789 // and destination register class don't match. For example:
6790 //
6791 // %0 = COPY %xzr; GPR64common:%0
6792 //
6793 // In this case we can still safely fold away the COPY and generate the
6794 // following spill code:
6795 //
6796 // STRXui %xzr, %stack.0
6797 //
6798 // This also eliminates spilled cross register class COPYs (e.g. between x and
6799 // d regs) of the same size. For example:
6800 //
6801 // %0 = COPY %1; GPR64:%0, FPR64:%1
6802 //
6803 // will be filled as
6804 //
6805 // LDRDui %0, fi<#0>
6806 //
6807 // instead of
6808 //
6809 // LDRXui %Temp, fi<#0>
6810 // %0 = FMOV %Temp
6811 //
6812 if (MI.isCopy() && Ops.size() == 1 &&
6813 // Make sure we're only folding the explicit COPY defs/uses.
6814 (Ops[0] == 0 || Ops[0] == 1)) {
6815 bool IsSpill = Ops[0] == 0;
6816 bool IsFill = !IsSpill;
6818 const MachineRegisterInfo &MRI = MF.getRegInfo();
6819 MachineBasicBlock &MBB = *MI.getParent();
6820 const MachineOperand &DstMO = MI.getOperand(0);
6821 const MachineOperand &SrcMO = MI.getOperand(1);
6822 Register DstReg = DstMO.getReg();
6823 Register SrcReg = SrcMO.getReg();
6824 // This is slightly expensive to compute for physical regs since
6825 // getMinimalPhysRegClass is slow.
6826 auto getRegClass = [&](unsigned Reg) {
6827 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6828 : TRI.getMinimalPhysRegClass(Reg);
6829 };
6830
6831 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6832 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6833 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6834 "Mismatched register size in non subreg COPY");
6835 if (IsSpill)
6836 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6837 getRegClass(SrcReg), Register());
6838 else
6839 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6840 getRegClass(DstReg), Register());
6841 return &*--InsertPt;
6842 }
6843
6844 // Handle cases like spilling def of:
6845 //
6846 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6847 //
6848 // where the physical register source can be widened and stored to the full
6849 // virtual reg destination stack slot, in this case producing:
6850 //
6851 // STRXui %xzr, %stack.0
6852 //
6853 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6854 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6855 assert(SrcMO.getSubReg() == 0 &&
6856 "Unexpected subreg on physical register");
6857 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6858 FrameIndex, &AArch64::GPR64RegClass, Register());
6859 return &*--InsertPt;
6860 }
6861
6862 // Handle cases like filling use of:
6863 //
6864 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6865 //
6866 // where we can load the full virtual reg source stack slot, into the subreg
6867 // destination, in this case producing:
6868 //
6869 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6870 //
6871 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6872 const TargetRegisterClass *FillRC = nullptr;
6873 switch (DstMO.getSubReg()) {
6874 default:
6875 break;
6876 case AArch64::sub_32:
6877 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6878 FillRC = &AArch64::GPR32RegClass;
6879 break;
6880 case AArch64::ssub:
6881 FillRC = &AArch64::FPR32RegClass;
6882 break;
6883 case AArch64::dsub:
6884 FillRC = &AArch64::FPR64RegClass;
6885 break;
6886 }
6887
6888 if (FillRC) {
6889 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6890 TRI.getRegSizeInBits(*FillRC) &&
6891 "Mismatched regclass size on folded subreg COPY");
6892 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC,
6893 Register());
6894 MachineInstr &LoadMI = *--InsertPt;
6895 MachineOperand &LoadDst = LoadMI.getOperand(0);
6896 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6897 LoadDst.setSubReg(DstMO.getSubReg());
6898 LoadDst.setIsUndef();
6899 return &LoadMI;
6900 }
6901 }
6902 }
6903
6904 // Cannot fold.
6905 return nullptr;
6906}
6907
6909 StackOffset &SOffset,
6910 bool *OutUseUnscaledOp,
6911 unsigned *OutUnscaledOp,
6912 int64_t *EmittableOffset) {
6913 // Set output values in case of early exit.
6914 if (EmittableOffset)
6915 *EmittableOffset = 0;
6916 if (OutUseUnscaledOp)
6917 *OutUseUnscaledOp = false;
6918 if (OutUnscaledOp)
6919 *OutUnscaledOp = 0;
6920
6921 // Exit early for structured vector spills/fills as they can't take an
6922 // immediate offset.
6923 switch (MI.getOpcode()) {
6924 default:
6925 break;
6926 case AArch64::LD1Rv1d:
6927 case AArch64::LD1Rv2s:
6928 case AArch64::LD1Rv2d:
6929 case AArch64::LD1Rv4h:
6930 case AArch64::LD1Rv4s:
6931 case AArch64::LD1Rv8b:
6932 case AArch64::LD1Rv8h:
6933 case AArch64::LD1Rv16b:
6934 case AArch64::LD1Twov2d:
6935 case AArch64::LD1Threev2d:
6936 case AArch64::LD1Fourv2d:
6937 case AArch64::LD1Twov1d:
6938 case AArch64::LD1Threev1d:
6939 case AArch64::LD1Fourv1d:
6940 case AArch64::ST1Twov2d:
6941 case AArch64::ST1Threev2d:
6942 case AArch64::ST1Fourv2d:
6943 case AArch64::ST1Twov1d:
6944 case AArch64::ST1Threev1d:
6945 case AArch64::ST1Fourv1d:
6946 case AArch64::ST1i8:
6947 case AArch64::ST1i16:
6948 case AArch64::ST1i32:
6949 case AArch64::ST1i64:
6950 case AArch64::IRG:
6951 case AArch64::IRGstack:
6952 case AArch64::STGloop:
6953 case AArch64::STZGloop:
6955 }
6956
6957 // Get the min/max offset and the scale.
6958 TypeSize ScaleValue(0U, false), Width(0U, false);
6959 int64_t MinOff, MaxOff;
6960 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6961 MaxOff))
6962 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6963
6964 // Construct the complete offset.
6965 bool IsMulVL = ScaleValue.isScalable();
6966 unsigned Scale = ScaleValue.getKnownMinValue();
6967 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6968
6969 const MachineOperand &ImmOpnd =
6970 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6971 Offset += ImmOpnd.getImm() * Scale;
6972
6973 // If the offset doesn't match the scale, we rewrite the instruction to
6974 // use the unscaled instruction instead. Likewise, if we have a negative
6975 // offset and there is an unscaled op to use.
6976 std::optional<unsigned> UnscaledOp =
6978 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6979 if (useUnscaledOp &&
6980 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6981 MaxOff))
6982 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6983
6984 Scale = ScaleValue.getKnownMinValue();
6985 assert(IsMulVL == ScaleValue.isScalable() &&
6986 "Unscaled opcode has different value for scalable");
6987
6988 int64_t Remainder = Offset % Scale;
6989 assert(!(Remainder && useUnscaledOp) &&
6990 "Cannot have remainder when using unscaled op");
6991
6992 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6993 int64_t NewOffset = Offset / Scale;
6994 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6995 Offset = Remainder;
6996 else {
6997 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6998 Offset = Offset - (NewOffset * Scale);
6999 }
7000
7001 if (EmittableOffset)
7002 *EmittableOffset = NewOffset;
7003 if (OutUseUnscaledOp)
7004 *OutUseUnscaledOp = useUnscaledOp;
7005 if (OutUnscaledOp && UnscaledOp)
7006 *OutUnscaledOp = *UnscaledOp;
7007
7008 if (IsMulVL)
7009 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
7010 else
7011 SOffset = StackOffset::get(Offset, SOffset.getScalable());
7013 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
7014}
7015
7017 unsigned FrameReg, StackOffset &Offset,
7018 const AArch64InstrInfo *TII) {
7019 unsigned Opcode = MI.getOpcode();
7020 unsigned ImmIdx = FrameRegIdx + 1;
7021
7022 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
7023 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
7024 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
7025 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
7026 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
7027 MI.eraseFromParent();
7028 Offset = StackOffset();
7029 return true;
7030 }
7031
7032 int64_t NewOffset;
7033 unsigned UnscaledOp;
7034 bool UseUnscaledOp;
7035 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
7036 &UnscaledOp, &NewOffset);
7039 // Replace the FrameIndex with FrameReg.
7040 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
7041 if (UseUnscaledOp)
7042 MI.setDesc(TII->get(UnscaledOp));
7043
7044 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
7045 return !Offset;
7046 }
7047
7048 return false;
7049}
7050
7056
7057MCInst AArch64InstrInfo::getNop() const { return MCInstBuilder(AArch64::NOP); }
7058
7059// AArch64 supports MachineCombiner.
7060bool AArch64InstrInfo::useMachineCombiner() const { return true; }
7061
7062// True when Opc sets flag
7063static bool isCombineInstrSettingFlag(unsigned Opc) {
7064 switch (Opc) {
7065 case AArch64::ADDSWrr:
7066 case AArch64::ADDSWri:
7067 case AArch64::ADDSXrr:
7068 case AArch64::ADDSXri:
7069 case AArch64::SUBSWrr:
7070 case AArch64::SUBSXrr:
7071 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7072 case AArch64::SUBSWri:
7073 case AArch64::SUBSXri:
7074 return true;
7075 default:
7076 break;
7077 }
7078 return false;
7079}
7080
7081// 32b Opcodes that can be combined with a MUL
7082static bool isCombineInstrCandidate32(unsigned Opc) {
7083 switch (Opc) {
7084 case AArch64::ADDWrr:
7085 case AArch64::ADDWri:
7086 case AArch64::SUBWrr:
7087 case AArch64::ADDSWrr:
7088 case AArch64::ADDSWri:
7089 case AArch64::SUBSWrr:
7090 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7091 case AArch64::SUBWri:
7092 case AArch64::SUBSWri:
7093 return true;
7094 default:
7095 break;
7096 }
7097 return false;
7098}
7099
7100// 64b Opcodes that can be combined with a MUL
7101static bool isCombineInstrCandidate64(unsigned Opc) {
7102 switch (Opc) {
7103 case AArch64::ADDXrr:
7104 case AArch64::ADDXri:
7105 case AArch64::SUBXrr:
7106 case AArch64::ADDSXrr:
7107 case AArch64::ADDSXri:
7108 case AArch64::SUBSXrr:
7109 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
7110 case AArch64::SUBXri:
7111 case AArch64::SUBSXri:
7112 case AArch64::ADDv8i8:
7113 case AArch64::ADDv16i8:
7114 case AArch64::ADDv4i16:
7115 case AArch64::ADDv8i16:
7116 case AArch64::ADDv2i32:
7117 case AArch64::ADDv4i32:
7118 case AArch64::SUBv8i8:
7119 case AArch64::SUBv16i8:
7120 case AArch64::SUBv4i16:
7121 case AArch64::SUBv8i16:
7122 case AArch64::SUBv2i32:
7123 case AArch64::SUBv4i32:
7124 return true;
7125 default:
7126 break;
7127 }
7128 return false;
7129}
7130
7131// FP Opcodes that can be combined with a FMUL.
7132static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
7133 switch (Inst.getOpcode()) {
7134 default:
7135 break;
7136 case AArch64::FADDHrr:
7137 case AArch64::FADDSrr:
7138 case AArch64::FADDDrr:
7139 case AArch64::FADDv4f16:
7140 case AArch64::FADDv8f16:
7141 case AArch64::FADDv2f32:
7142 case AArch64::FADDv2f64:
7143 case AArch64::FADDv4f32:
7144 case AArch64::FSUBHrr:
7145 case AArch64::FSUBSrr:
7146 case AArch64::FSUBDrr:
7147 case AArch64::FSUBv4f16:
7148 case AArch64::FSUBv8f16:
7149 case AArch64::FSUBv2f32:
7150 case AArch64::FSUBv2f64:
7151 case AArch64::FSUBv4f32:
7153 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
7154 // the target options or if FADD/FSUB has the contract fast-math flag.
7155 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
7157 }
7158 return false;
7159}
7160
7161// Opcodes that can be combined with a MUL
7165
7166//
7167// Utility routine that checks if \param MO is defined by an
7168// \param CombineOpc instruction in the basic block \param MBB
7170 unsigned CombineOpc, unsigned ZeroReg = 0,
7171 bool CheckZeroReg = false) {
7172 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7173 MachineInstr *MI = nullptr;
7174
7175 if (MO.isReg() && MO.getReg().isVirtual())
7176 MI = MRI.getUniqueVRegDef(MO.getReg());
7177 // And it needs to be in the trace (otherwise, it won't have a depth).
7178 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
7179 return false;
7180 // Must only used by the user we combine with.
7181 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
7182 return false;
7183
7184 if (CheckZeroReg) {
7185 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
7186 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
7187 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
7188 // The third input reg must be zero.
7189 if (MI->getOperand(3).getReg() != ZeroReg)
7190 return false;
7191 }
7192
7193 if (isCombineInstrSettingFlag(CombineOpc) &&
7194 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
7195 return false;
7196
7197 return true;
7198}
7199
7200//
7201// Is \param MO defined by an integer multiply and can be combined?
7203 unsigned MulOpc, unsigned ZeroReg) {
7204 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
7205}
7206
7207//
7208// Is \param MO defined by a floating-point multiply and can be combined?
7210 unsigned MulOpc) {
7211 return canCombine(MBB, MO, MulOpc);
7212}
7213
7214// TODO: There are many more machine instruction opcodes to match:
7215// 1. Other data types (integer, vectors)
7216// 2. Other math / logic operations (xor, or)
7217// 3. Other forms of the same operation (intrinsics and other variants)
7218bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
7219 bool Invert) const {
7220 if (Invert)
7221 return false;
7222 switch (Inst.getOpcode()) {
7223 // == Floating-point types ==
7224 // -- Floating-point instructions --
7225 case AArch64::FADDHrr:
7226 case AArch64::FADDSrr:
7227 case AArch64::FADDDrr:
7228 case AArch64::FMULHrr:
7229 case AArch64::FMULSrr:
7230 case AArch64::FMULDrr:
7231 case AArch64::FMULX16:
7232 case AArch64::FMULX32:
7233 case AArch64::FMULX64:
7234 // -- Advanced SIMD instructions --
7235 case AArch64::FADDv4f16:
7236 case AArch64::FADDv8f16:
7237 case AArch64::FADDv2f32:
7238 case AArch64::FADDv4f32:
7239 case AArch64::FADDv2f64:
7240 case AArch64::FMULv4f16:
7241 case AArch64::FMULv8f16:
7242 case AArch64::FMULv2f32:
7243 case AArch64::FMULv4f32:
7244 case AArch64::FMULv2f64:
7245 case AArch64::FMULXv4f16:
7246 case AArch64::FMULXv8f16:
7247 case AArch64::FMULXv2f32:
7248 case AArch64::FMULXv4f32:
7249 case AArch64::FMULXv2f64:
7250 // -- SVE instructions --
7251 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
7252 // in the SVE instruction set (though there are predicated ones).
7253 case AArch64::FADD_ZZZ_H:
7254 case AArch64::FADD_ZZZ_S:
7255 case AArch64::FADD_ZZZ_D:
7256 case AArch64::FMUL_ZZZ_H:
7257 case AArch64::FMUL_ZZZ_S:
7258 case AArch64::FMUL_ZZZ_D:
7261
7262 // == Integer types ==
7263 // -- Base instructions --
7264 // Opcodes MULWrr and MULXrr don't exist because
7265 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
7266 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
7267 // The machine-combiner does not support three-source-operands machine
7268 // instruction. So we cannot reassociate MULs.
7269 case AArch64::ADDWrr:
7270 case AArch64::ADDXrr:
7271 case AArch64::ANDWrr:
7272 case AArch64::ANDXrr:
7273 case AArch64::ORRWrr:
7274 case AArch64::ORRXrr:
7275 case AArch64::EORWrr:
7276 case AArch64::EORXrr:
7277 case AArch64::EONWrr:
7278 case AArch64::EONXrr:
7279 // -- Advanced SIMD instructions --
7280 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
7281 // in the Advanced SIMD instruction set.
7282 case AArch64::ADDv8i8:
7283 case AArch64::ADDv16i8:
7284 case AArch64::ADDv4i16:
7285 case AArch64::ADDv8i16:
7286 case AArch64::ADDv2i32:
7287 case AArch64::ADDv4i32:
7288 case AArch64::ADDv1i64:
7289 case AArch64::ADDv2i64:
7290 case AArch64::MULv8i8:
7291 case AArch64::MULv16i8:
7292 case AArch64::MULv4i16:
7293 case AArch64::MULv8i16:
7294 case AArch64::MULv2i32:
7295 case AArch64::MULv4i32:
7296 case AArch64::ANDv8i8:
7297 case AArch64::ANDv16i8:
7298 case AArch64::ORRv8i8:
7299 case AArch64::ORRv16i8:
7300 case AArch64::EORv8i8:
7301 case AArch64::EORv16i8:
7302 // -- SVE instructions --
7303 case AArch64::ADD_ZZZ_B:
7304 case AArch64::ADD_ZZZ_H:
7305 case AArch64::ADD_ZZZ_S:
7306 case AArch64::ADD_ZZZ_D:
7307 case AArch64::MUL_ZZZ_B:
7308 case AArch64::MUL_ZZZ_H:
7309 case AArch64::MUL_ZZZ_S:
7310 case AArch64::MUL_ZZZ_D:
7311 case AArch64::AND_ZZZ:
7312 case AArch64::ORR_ZZZ:
7313 case AArch64::EOR_ZZZ:
7314 return true;
7315
7316 default:
7317 return false;
7318 }
7319}
7320
7321/// Find instructions that can be turned into madd.
7323 SmallVectorImpl<unsigned> &Patterns) {
7324 unsigned Opc = Root.getOpcode();
7325 MachineBasicBlock &MBB = *Root.getParent();
7326 bool Found = false;
7327
7329 return false;
7331 int Cmp_NZCV =
7332 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
7333 // When NZCV is live bail out.
7334 if (Cmp_NZCV == -1)
7335 return false;
7336 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
7337 // When opcode can't change bail out.
7338 // CHECKME: do we miss any cases for opcode conversion?
7339 if (NewOpc == Opc)
7340 return false;
7341 Opc = NewOpc;
7342 }
7343
7344 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
7345 unsigned Pattern) {
7346 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
7347 Patterns.push_back(Pattern);
7348 Found = true;
7349 }
7350 };
7351
7352 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
7353 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
7354 Patterns.push_back(Pattern);
7355 Found = true;
7356 }
7357 };
7358
7360
7361 switch (Opc) {
7362 default:
7363 break;
7364 case AArch64::ADDWrr:
7365 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7366 "ADDWrr does not have register operands");
7367 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
7368 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
7369 break;
7370 case AArch64::ADDXrr:
7371 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
7372 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
7373 break;
7374 case AArch64::SUBWrr:
7375 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
7376 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
7377 break;
7378 case AArch64::SUBXrr:
7379 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
7380 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
7381 break;
7382 case AArch64::ADDWri:
7383 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
7384 break;
7385 case AArch64::ADDXri:
7386 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
7387 break;
7388 case AArch64::SUBWri:
7389 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
7390 break;
7391 case AArch64::SUBXri:
7392 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
7393 break;
7394 case AArch64::ADDv8i8:
7395 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
7396 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
7397 break;
7398 case AArch64::ADDv16i8:
7399 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
7400 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
7401 break;
7402 case AArch64::ADDv4i16:
7403 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
7404 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
7405 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
7406 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
7407 break;
7408 case AArch64::ADDv8i16:
7409 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
7410 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
7411 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
7412 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
7413 break;
7414 case AArch64::ADDv2i32:
7415 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
7416 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
7417 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
7418 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
7419 break;
7420 case AArch64::ADDv4i32:
7421 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
7422 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
7423 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
7424 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
7425 break;
7426 case AArch64::SUBv8i8:
7427 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7428 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7429 break;
7430 case AArch64::SUBv16i8:
7431 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7432 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7433 break;
7434 case AArch64::SUBv4i16:
7435 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7436 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7437 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7438 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7439 break;
7440 case AArch64::SUBv8i16:
7441 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7442 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7443 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7444 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7445 break;
7446 case AArch64::SUBv2i32:
7447 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7448 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7449 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7450 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7451 break;
7452 case AArch64::SUBv4i32:
7453 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7454 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7455 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7456 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7457 break;
7458 }
7459 return Found;
7460}
7461
7462bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7463 switch (Opcode) {
7464 default:
7465 break;
7466 case AArch64::UABALB_ZZZ_D:
7467 case AArch64::UABALB_ZZZ_H:
7468 case AArch64::UABALB_ZZZ_S:
7469 case AArch64::UABALT_ZZZ_D:
7470 case AArch64::UABALT_ZZZ_H:
7471 case AArch64::UABALT_ZZZ_S:
7472 case AArch64::SABALB_ZZZ_D:
7473 case AArch64::SABALB_ZZZ_S:
7474 case AArch64::SABALB_ZZZ_H:
7475 case AArch64::SABALT_ZZZ_D:
7476 case AArch64::SABALT_ZZZ_S:
7477 case AArch64::SABALT_ZZZ_H:
7478 case AArch64::UABALv16i8_v8i16:
7479 case AArch64::UABALv2i32_v2i64:
7480 case AArch64::UABALv4i16_v4i32:
7481 case AArch64::UABALv4i32_v2i64:
7482 case AArch64::UABALv8i16_v4i32:
7483 case AArch64::UABALv8i8_v8i16:
7484 case AArch64::UABAv16i8:
7485 case AArch64::UABAv2i32:
7486 case AArch64::UABAv4i16:
7487 case AArch64::UABAv4i32:
7488 case AArch64::UABAv8i16:
7489 case AArch64::UABAv8i8:
7490 case AArch64::SABALv16i8_v8i16:
7491 case AArch64::SABALv2i32_v2i64:
7492 case AArch64::SABALv4i16_v4i32:
7493 case AArch64::SABALv4i32_v2i64:
7494 case AArch64::SABALv8i16_v4i32:
7495 case AArch64::SABALv8i8_v8i16:
7496 case AArch64::SABAv16i8:
7497 case AArch64::SABAv2i32:
7498 case AArch64::SABAv4i16:
7499 case AArch64::SABAv4i32:
7500 case AArch64::SABAv8i16:
7501 case AArch64::SABAv8i8:
7502 return true;
7503 }
7504
7505 return false;
7506}
7507
7508unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7509 unsigned AccumulationOpcode) const {
7510 switch (AccumulationOpcode) {
7511 default:
7512 llvm_unreachable("Unsupported accumulation Opcode!");
7513 case AArch64::UABALB_ZZZ_D:
7514 return AArch64::UABDLB_ZZZ_D;
7515 case AArch64::UABALB_ZZZ_H:
7516 return AArch64::UABDLB_ZZZ_H;
7517 case AArch64::UABALB_ZZZ_S:
7518 return AArch64::UABDLB_ZZZ_S;
7519 case AArch64::UABALT_ZZZ_D:
7520 return AArch64::UABDLT_ZZZ_D;
7521 case AArch64::UABALT_ZZZ_H:
7522 return AArch64::UABDLT_ZZZ_H;
7523 case AArch64::UABALT_ZZZ_S:
7524 return AArch64::UABDLT_ZZZ_S;
7525 case AArch64::UABALv16i8_v8i16:
7526 return AArch64::UABDLv16i8_v8i16;
7527 case AArch64::UABALv2i32_v2i64:
7528 return AArch64::UABDLv2i32_v2i64;
7529 case AArch64::UABALv4i16_v4i32:
7530 return AArch64::UABDLv4i16_v4i32;
7531 case AArch64::UABALv4i32_v2i64:
7532 return AArch64::UABDLv4i32_v2i64;
7533 case AArch64::UABALv8i16_v4i32:
7534 return AArch64::UABDLv8i16_v4i32;
7535 case AArch64::UABALv8i8_v8i16:
7536 return AArch64::UABDLv8i8_v8i16;
7537 case AArch64::UABAv16i8:
7538 return AArch64::UABDv16i8;
7539 case AArch64::UABAv2i32:
7540 return AArch64::UABDv2i32;
7541 case AArch64::UABAv4i16:
7542 return AArch64::UABDv4i16;
7543 case AArch64::UABAv4i32:
7544 return AArch64::UABDv4i32;
7545 case AArch64::UABAv8i16:
7546 return AArch64::UABDv8i16;
7547 case AArch64::UABAv8i8:
7548 return AArch64::UABDv8i8;
7549 case AArch64::SABALB_ZZZ_D:
7550 return AArch64::SABDLB_ZZZ_D;
7551 case AArch64::SABALB_ZZZ_S:
7552 return AArch64::SABDLB_ZZZ_S;
7553 case AArch64::SABALB_ZZZ_H:
7554 return AArch64::SABDLB_ZZZ_H;
7555 case AArch64::SABALT_ZZZ_D:
7556 return AArch64::SABDLT_ZZZ_D;
7557 case AArch64::SABALT_ZZZ_S:
7558 return AArch64::SABDLT_ZZZ_S;
7559 case AArch64::SABALT_ZZZ_H:
7560 return AArch64::SABDLT_ZZZ_H;
7561 case AArch64::SABALv16i8_v8i16:
7562 return AArch64::SABDLv16i8_v8i16;
7563 case AArch64::SABALv2i32_v2i64:
7564 return AArch64::SABDLv2i32_v2i64;
7565 case AArch64::SABALv4i16_v4i32:
7566 return AArch64::SABDLv4i16_v4i32;
7567 case AArch64::SABALv4i32_v2i64:
7568 return AArch64::SABDLv4i32_v2i64;
7569 case AArch64::SABALv8i16_v4i32:
7570 return AArch64::SABDLv8i16_v4i32;
7571 case AArch64::SABALv8i8_v8i16:
7572 return AArch64::SABDLv8i8_v8i16;
7573 case AArch64::SABAv16i8:
7574 return AArch64::SABDv16i8;
7575 case AArch64::SABAv2i32:
7576 return AArch64::SABAv2i32;
7577 case AArch64::SABAv4i16:
7578 return AArch64::SABDv4i16;
7579 case AArch64::SABAv4i32:
7580 return AArch64::SABDv4i32;
7581 case AArch64::SABAv8i16:
7582 return AArch64::SABDv8i16;
7583 case AArch64::SABAv8i8:
7584 return AArch64::SABDv8i8;
7585 }
7586}
7587
7588/// Floating-Point Support
7589
7590/// Find instructions that can be turned into madd.
7592 SmallVectorImpl<unsigned> &Patterns) {
7593
7594 if (!isCombineInstrCandidateFP(Root))
7595 return false;
7596
7597 MachineBasicBlock &MBB = *Root.getParent();
7598 bool Found = false;
7599
7600 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7601 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7602 Patterns.push_back(Pattern);
7603 return true;
7604 }
7605 return false;
7606 };
7607
7609
7610 switch (Root.getOpcode()) {
7611 default:
7612 assert(false && "Unsupported FP instruction in combiner\n");
7613 break;
7614 case AArch64::FADDHrr:
7615 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7616 "FADDHrr does not have register operands");
7617
7618 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7619 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7620 break;
7621 case AArch64::FADDSrr:
7622 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7623 "FADDSrr does not have register operands");
7624
7625 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7626 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7627
7628 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7629 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7630 break;
7631 case AArch64::FADDDrr:
7632 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7633 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7634
7635 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7636 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7637 break;
7638 case AArch64::FADDv4f16:
7639 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7640 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7641
7642 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7643 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7644 break;
7645 case AArch64::FADDv8f16:
7646 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7647 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7648
7649 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7650 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7651 break;
7652 case AArch64::FADDv2f32:
7653 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7654 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7655
7656 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7657 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7658 break;
7659 case AArch64::FADDv2f64:
7660 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7661 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7662
7663 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7664 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7665 break;
7666 case AArch64::FADDv4f32:
7667 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7668 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7669
7670 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7671 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7672 break;
7673 case AArch64::FSUBHrr:
7674 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7675 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7676 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7677 break;
7678 case AArch64::FSUBSrr:
7679 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7680
7681 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7682 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7683
7684 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7685 break;
7686 case AArch64::FSUBDrr:
7687 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7688
7689 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7690 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7691
7692 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7693 break;
7694 case AArch64::FSUBv4f16:
7695 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7696 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7697
7698 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7699 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7700 break;
7701 case AArch64::FSUBv8f16:
7702 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7703 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7704
7705 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7706 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7707 break;
7708 case AArch64::FSUBv2f32:
7709 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7710 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7711
7712 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7713 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7714 break;
7715 case AArch64::FSUBv2f64:
7716 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7717 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7718
7719 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7720 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7721 break;
7722 case AArch64::FSUBv4f32:
7723 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7724 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7725
7726 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7727 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7728 break;
7729 }
7730 return Found;
7731}
7732
7734 SmallVectorImpl<unsigned> &Patterns) {
7735 MachineBasicBlock &MBB = *Root.getParent();
7736 bool Found = false;
7737
7738 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7739 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7740 MachineOperand &MO = Root.getOperand(Operand);
7741 MachineInstr *MI = nullptr;
7742 if (MO.isReg() && MO.getReg().isVirtual())
7743 MI = MRI.getUniqueVRegDef(MO.getReg());
7744 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7745 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7746 MI->getOperand(1).getReg().isVirtual())
7747 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7748 if (MI && MI->getOpcode() == Opcode) {
7749 Patterns.push_back(Pattern);
7750 return true;
7751 }
7752 return false;
7753 };
7754
7756
7757 switch (Root.getOpcode()) {
7758 default:
7759 return false;
7760 case AArch64::FMULv2f32:
7761 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7762 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7763 break;
7764 case AArch64::FMULv2f64:
7765 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7766 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7767 break;
7768 case AArch64::FMULv4f16:
7769 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7770 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7771 break;
7772 case AArch64::FMULv4f32:
7773 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7774 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7775 break;
7776 case AArch64::FMULv8f16:
7777 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7778 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7779 break;
7780 }
7781
7782 return Found;
7783}
7784
7786 SmallVectorImpl<unsigned> &Patterns) {
7787 unsigned Opc = Root.getOpcode();
7788 MachineBasicBlock &MBB = *Root.getParent();
7789 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7790
7791 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7792 MachineOperand &MO = Root.getOperand(1);
7793 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7794 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7795 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7799 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7800 Patterns.push_back(Pattern);
7801 return true;
7802 }
7803 return false;
7804 };
7805
7806 switch (Opc) {
7807 default:
7808 break;
7809 case AArch64::FNEGDr:
7810 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7811 case AArch64::FNEGSr:
7812 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7813 }
7814
7815 return false;
7816}
7817
7818/// Return true when a code sequence can improve throughput. It
7819/// should be called only for instructions in loops.
7820/// \param Pattern - combiner pattern
7822 switch (Pattern) {
7823 default:
7824 break;
7930 return true;
7931 } // end switch (Pattern)
7932 return false;
7933}
7934
7935/// Find other MI combine patterns.
7937 SmallVectorImpl<unsigned> &Patterns) {
7938 // A - (B + C) ==> (A - B) - C or (A - C) - B
7939 unsigned Opc = Root.getOpcode();
7940 MachineBasicBlock &MBB = *Root.getParent();
7941
7942 switch (Opc) {
7943 case AArch64::SUBWrr:
7944 case AArch64::SUBSWrr:
7945 case AArch64::SUBXrr:
7946 case AArch64::SUBSXrr:
7947 // Found candidate root.
7948 break;
7949 default:
7950 return false;
7951 }
7952
7954 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7955 -1)
7956 return false;
7957
7958 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7959 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7960 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7961 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7964 return true;
7965 }
7966
7967 return false;
7968}
7969
7970/// Check if the given instruction forms a gather load pattern that can be
7971/// optimized for better Memory-Level Parallelism (MLP). This function
7972/// identifies chains of NEON lane load instructions that load data from
7973/// different memory addresses into individual lanes of a 128-bit vector
7974/// register, then attempts to split the pattern into parallel loads to break
7975/// the serial dependency between instructions.
7976///
7977/// Pattern Matched:
7978/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7979/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7980///
7981/// Transformed Into:
7982/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7983/// to combine the results, enabling better memory-level parallelism.
7984///
7985/// Supported Element Types:
7986/// - 32-bit elements (LD1i32, 4 lanes total)
7987/// - 16-bit elements (LD1i16, 8 lanes total)
7988/// - 8-bit elements (LD1i8, 16 lanes total)
7990 SmallVectorImpl<unsigned> &Patterns,
7991 unsigned LoadLaneOpCode, unsigned NumLanes) {
7992 const MachineFunction *MF = Root.getMF();
7993
7994 // Early exit if optimizing for size.
7995 if (MF->getFunction().hasMinSize())
7996 return false;
7997
7998 const MachineRegisterInfo &MRI = MF->getRegInfo();
8000
8001 // The root of the pattern must load into the last lane of the vector.
8002 if (Root.getOperand(2).getImm() != NumLanes - 1)
8003 return false;
8004
8005 // Check that we have load into all lanes except lane 0.
8006 // For each load we also want to check that:
8007 // 1. It has a single non-debug use (since we will be replacing the virtual
8008 // register)
8009 // 2. That the addressing mode only uses a single pointer operand
8010 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8011 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
8012 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
8014 while (!RemainingLanes.empty() && CurrInstr &&
8015 CurrInstr->getOpcode() == LoadLaneOpCode &&
8016 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
8017 CurrInstr->getNumOperands() == 4) {
8018 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
8019 LoadInstrs.push_back(CurrInstr);
8020 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
8021 }
8022
8023 // Check that we have found a match for lanes N-1.. 1.
8024 if (!RemainingLanes.empty())
8025 return false;
8026
8027 // Match the SUBREG_TO_REG sequence.
8028 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
8029 return false;
8030
8031 // Verify that the subreg to reg loads an integer into the first lane.
8032 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
8033 unsigned SingleLaneSizeInBits = 128 / NumLanes;
8034 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
8035 return false;
8036
8037 // Verify that it also has a single non debug use.
8038 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
8039 return false;
8040
8041 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
8042
8043 // If there is any chance of aliasing, do not apply the pattern.
8044 // Walk backward through the MBB starting from Root.
8045 // Exit early if we've encountered all load instructions or hit the search
8046 // limit.
8047 auto MBBItr = Root.getIterator();
8048 unsigned RemainingSteps = GatherOptSearchLimit;
8049 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
8050 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
8051 const MachineBasicBlock *MBB = Root.getParent();
8052
8053 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
8054 !RemainingLoadInstrs.empty();
8055 --MBBItr, --RemainingSteps) {
8056 const MachineInstr &CurrInstr = *MBBItr;
8057
8058 // Remove this instruction from remaining loads if it's one we're tracking.
8059 RemainingLoadInstrs.erase(&CurrInstr);
8060
8061 // Check for potential aliasing with any of the load instructions to
8062 // optimize.
8063 if (CurrInstr.isLoadFoldBarrier())
8064 return false;
8065 }
8066
8067 // If we hit the search limit without finding all load instructions,
8068 // don't match the pattern.
8069 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
8070 return false;
8071
8072 switch (NumLanes) {
8073 case 4:
8075 break;
8076 case 8:
8078 break;
8079 case 16:
8081 break;
8082 default:
8083 llvm_unreachable("Got bad number of lanes for gather pattern.");
8084 }
8085
8086 return true;
8087}
8088
8089/// Search for patterns of LD instructions we can optimize.
8091 SmallVectorImpl<unsigned> &Patterns) {
8092
8093 // The pattern searches for loads into single lanes.
8094 switch (Root.getOpcode()) {
8095 case AArch64::LD1i32:
8096 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
8097 case AArch64::LD1i16:
8098 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
8099 case AArch64::LD1i8:
8100 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
8101 default:
8102 return false;
8103 }
8104}
8105
8106/// Generate optimized instruction sequence for gather load patterns to improve
8107/// Memory-Level Parallelism (MLP). This function transforms a chain of
8108/// sequential NEON lane loads into parallel vector loads that can execute
8109/// concurrently.
8110static void
8114 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8115 unsigned Pattern, unsigned NumLanes) {
8116 MachineFunction &MF = *Root.getParent()->getParent();
8119
8120 // Gather the initial load instructions to build the pattern.
8121 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
8122 MachineInstr *CurrInstr = &Root;
8123 for (unsigned i = 0; i < NumLanes - 1; ++i) {
8124 LoadToLaneInstrs.push_back(CurrInstr);
8125 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
8126 }
8127
8128 // Sort the load instructions according to the lane.
8129 llvm::sort(LoadToLaneInstrs,
8130 [](const MachineInstr *A, const MachineInstr *B) {
8131 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
8132 });
8133
8134 MachineInstr *SubregToReg = CurrInstr;
8135 LoadToLaneInstrs.push_back(
8136 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
8137 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
8138
8139 const TargetRegisterClass *FPR128RegClass =
8140 MRI.getRegClass(Root.getOperand(0).getReg());
8141
8142 // Helper lambda to create a LD1 instruction.
8143 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
8144 Register SrcRegister, unsigned Lane,
8145 Register OffsetRegister,
8146 bool OffsetRegisterKillState) {
8147 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
8148 MachineInstrBuilder LoadIndexIntoRegister =
8149 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
8150 NewRegister)
8151 .addReg(SrcRegister)
8152 .addImm(Lane)
8153 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
8154 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
8155 InsInstrs.push_back(LoadIndexIntoRegister);
8156 return NewRegister;
8157 };
8158
8159 // Helper to create load instruction based on the NumLanes in the NEON
8160 // register we are rewriting.
8161 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
8162 Register OffsetReg,
8163 bool KillState) -> MachineInstrBuilder {
8164 unsigned Opcode;
8165 switch (NumLanes) {
8166 case 4:
8167 Opcode = AArch64::LDRSui;
8168 break;
8169 case 8:
8170 Opcode = AArch64::LDRHui;
8171 break;
8172 case 16:
8173 Opcode = AArch64::LDRBui;
8174 break;
8175 default:
8177 "Got unsupported number of lanes in machine-combiner gather pattern");
8178 }
8179 // Immediate offset load
8180 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
8181 .addReg(OffsetReg)
8182 .addImm(0);
8183 };
8184
8185 // Load the remaining lanes into register 0.
8186 auto LanesToLoadToReg0 =
8187 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
8188 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
8189 Register PrevReg = SubregToReg->getOperand(0).getReg();
8190 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
8191 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8192 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8193 OffsetRegOperand.getReg(),
8194 OffsetRegOperand.isKill());
8195 DelInstrs.push_back(LoadInstr);
8196 }
8197 Register LastLoadReg0 = PrevReg;
8198
8199 // First load into register 1. Perform an integer load to zero out the upper
8200 // lanes in a single instruction.
8201 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
8202 MachineInstr *OriginalSplitLoad =
8203 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
8204 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
8205 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
8206
8207 const MachineOperand &OriginalSplitToLoadOffsetOperand =
8208 OriginalSplitLoad->getOperand(3);
8209 MachineInstrBuilder MiddleIndexLoadInstr =
8210 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
8211 OriginalSplitToLoadOffsetOperand.getReg(),
8212 OriginalSplitToLoadOffsetOperand.isKill());
8213
8214 InstrIdxForVirtReg.insert(
8215 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
8216 InsInstrs.push_back(MiddleIndexLoadInstr);
8217 DelInstrs.push_back(OriginalSplitLoad);
8218
8219 // Subreg To Reg instruction for register 1.
8220 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
8221 unsigned SubregType;
8222 switch (NumLanes) {
8223 case 4:
8224 SubregType = AArch64::ssub;
8225 break;
8226 case 8:
8227 SubregType = AArch64::hsub;
8228 break;
8229 case 16:
8230 SubregType = AArch64::bsub;
8231 break;
8232 default:
8234 "Got invalid NumLanes for machine-combiner gather pattern");
8235 }
8236
8237 auto SubRegToRegInstr =
8238 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
8239 DestRegForSubregToReg)
8240 .addImm(0)
8241 .addReg(DestRegForMiddleIndex, getKillRegState(true))
8242 .addImm(SubregType);
8243 InstrIdxForVirtReg.insert(
8244 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
8245 InsInstrs.push_back(SubRegToRegInstr);
8246
8247 // Load remaining lanes into register 1.
8248 auto LanesToLoadToReg1 =
8249 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
8250 LoadToLaneInstrsAscending.end());
8251 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
8252 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
8253 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8254 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8255 OffsetRegOperand.getReg(),
8256 OffsetRegOperand.isKill());
8257
8258 // Do not add the last reg to DelInstrs - it will be removed later.
8259 if (Index == NumLanes / 2 - 2) {
8260 break;
8261 }
8262 DelInstrs.push_back(LoadInstr);
8263 }
8264 Register LastLoadReg1 = PrevReg;
8265
8266 // Create the final zip instruction to combine the results.
8267 MachineInstrBuilder ZipInstr =
8268 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
8269 Root.getOperand(0).getReg())
8270 .addReg(LastLoadReg0)
8271 .addReg(LastLoadReg1);
8272 InsInstrs.push_back(ZipInstr);
8273}
8274
8288
8289/// Return true when there is potentially a faster code sequence for an
8290/// instruction chain ending in \p Root. All potential patterns are listed in
8291/// the \p Pattern vector. Pattern should be sorted in priority order since the
8292/// pattern evaluator stops checking as soon as it finds a faster sequence.
8293
8294bool AArch64InstrInfo::getMachineCombinerPatterns(
8295 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
8296 bool DoRegPressureReduce) const {
8297 // Integer patterns
8298 if (getMaddPatterns(Root, Patterns))
8299 return true;
8300 // Floating point patterns
8301 if (getFMULPatterns(Root, Patterns))
8302 return true;
8303 if (getFMAPatterns(Root, Patterns))
8304 return true;
8305 if (getFNEGPatterns(Root, Patterns))
8306 return true;
8307
8308 // Other patterns
8309 if (getMiscPatterns(Root, Patterns))
8310 return true;
8311
8312 // Load patterns
8313 if (getLoadPatterns(Root, Patterns))
8314 return true;
8315
8316 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
8317 DoRegPressureReduce);
8318}
8319
8321/// genFusedMultiply - Generate fused multiply instructions.
8322/// This function supports both integer and floating point instructions.
8323/// A typical example:
8324/// F|MUL I=A,B,0
8325/// F|ADD R,I,C
8326/// ==> F|MADD R,A,B,C
8327/// \param MF Containing MachineFunction
8328/// \param MRI Register information
8329/// \param TII Target information
8330/// \param Root is the F|ADD instruction
8331/// \param [out] InsInstrs is a vector of machine instructions and will
8332/// contain the generated madd instruction
8333/// \param IdxMulOpd is index of operand in Root that is the result of
8334/// the F|MUL. In the example above IdxMulOpd is 1.
8335/// \param MaddOpc the opcode fo the f|madd instruction
8336/// \param RC Register class of operands
8337/// \param kind of fma instruction (addressing mode) to be generated
8338/// \param ReplacedAddend is the result register from the instruction
8339/// replacing the non-combined operand, if any.
8340static MachineInstr *
8342 const TargetInstrInfo *TII, MachineInstr &Root,
8343 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
8344 unsigned MaddOpc, const TargetRegisterClass *RC,
8346 const Register *ReplacedAddend = nullptr) {
8347 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8348
8349 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
8350 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8351 Register ResultReg = Root.getOperand(0).getReg();
8352 Register SrcReg0 = MUL->getOperand(1).getReg();
8353 bool Src0IsKill = MUL->getOperand(1).isKill();
8354 Register SrcReg1 = MUL->getOperand(2).getReg();
8355 bool Src1IsKill = MUL->getOperand(2).isKill();
8356
8357 Register SrcReg2;
8358 bool Src2IsKill;
8359 if (ReplacedAddend) {
8360 // If we just generated a new addend, we must be it's only use.
8361 SrcReg2 = *ReplacedAddend;
8362 Src2IsKill = true;
8363 } else {
8364 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
8365 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
8366 }
8367
8368 if (ResultReg.isVirtual())
8369 MRI.constrainRegClass(ResultReg, RC);
8370 if (SrcReg0.isVirtual())
8371 MRI.constrainRegClass(SrcReg0, RC);
8372 if (SrcReg1.isVirtual())
8373 MRI.constrainRegClass(SrcReg1, RC);
8374 if (SrcReg2.isVirtual())
8375 MRI.constrainRegClass(SrcReg2, RC);
8376
8378 if (kind == FMAInstKind::Default)
8379 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8380 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8381 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8382 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8383 else if (kind == FMAInstKind::Indexed)
8384 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8385 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8386 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8387 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8388 .addImm(MUL->getOperand(3).getImm());
8389 else if (kind == FMAInstKind::Accumulator)
8390 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8391 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8392 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8393 .addReg(SrcReg1, getKillRegState(Src1IsKill));
8394 else
8395 assert(false && "Invalid FMA instruction kind \n");
8396 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
8397 InsInstrs.push_back(MIB);
8398 return MUL;
8399}
8400
8401static MachineInstr *
8403 const TargetInstrInfo *TII, MachineInstr &Root,
8405 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8406
8407 unsigned Opc = 0;
8408 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
8409 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
8410 Opc = AArch64::FNMADDSrrr;
8411 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
8412 Opc = AArch64::FNMADDDrrr;
8413 else
8414 return nullptr;
8415
8416 Register ResultReg = Root.getOperand(0).getReg();
8417 Register SrcReg0 = MAD->getOperand(1).getReg();
8418 Register SrcReg1 = MAD->getOperand(2).getReg();
8419 Register SrcReg2 = MAD->getOperand(3).getReg();
8420 bool Src0IsKill = MAD->getOperand(1).isKill();
8421 bool Src1IsKill = MAD->getOperand(2).isKill();
8422 bool Src2IsKill = MAD->getOperand(3).isKill();
8423 if (ResultReg.isVirtual())
8424 MRI.constrainRegClass(ResultReg, RC);
8425 if (SrcReg0.isVirtual())
8426 MRI.constrainRegClass(SrcReg0, RC);
8427 if (SrcReg1.isVirtual())
8428 MRI.constrainRegClass(SrcReg1, RC);
8429 if (SrcReg2.isVirtual())
8430 MRI.constrainRegClass(SrcReg2, RC);
8431
8433 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8434 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8435 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8436 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8437 InsInstrs.push_back(MIB);
8438
8439 return MAD;
8440}
8441
8442/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8443static MachineInstr *
8446 unsigned IdxDupOp, unsigned MulOpc,
8448 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8449 "Invalid index of FMUL operand");
8450
8451 MachineFunction &MF = *Root.getMF();
8453
8454 MachineInstr *Dup =
8455 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8456
8457 if (Dup->getOpcode() == TargetOpcode::COPY)
8458 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8459
8460 Register DupSrcReg = Dup->getOperand(1).getReg();
8461 MRI.clearKillFlags(DupSrcReg);
8462 MRI.constrainRegClass(DupSrcReg, RC);
8463
8464 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8465
8466 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8467 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8468
8469 Register ResultReg = Root.getOperand(0).getReg();
8470
8472 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8473 .add(MulOp)
8474 .addReg(DupSrcReg)
8475 .addImm(DupSrcLane);
8476
8477 InsInstrs.push_back(MIB);
8478 return &Root;
8479}
8480
8481/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8482/// instructions.
8483///
8484/// \see genFusedMultiply
8488 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8489 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8491}
8492
8493/// genNeg - Helper to generate an intermediate negation of the second operand
8494/// of Root
8496 const TargetInstrInfo *TII, MachineInstr &Root,
8498 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8499 unsigned MnegOpc, const TargetRegisterClass *RC) {
8500 Register NewVR = MRI.createVirtualRegister(RC);
8502 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8503 .add(Root.getOperand(2));
8504 InsInstrs.push_back(MIB);
8505
8506 assert(InstrIdxForVirtReg.empty());
8507 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8508
8509 return NewVR;
8510}
8511
8512/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8513/// instructions with an additional negation of the accumulator
8517 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8518 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8519 assert(IdxMulOpd == 1);
8520
8521 Register NewVR =
8522 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8523 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8524 FMAInstKind::Accumulator, &NewVR);
8525}
8526
8527/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8528/// instructions.
8529///
8530/// \see genFusedMultiply
8534 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8535 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8537}
8538
8539/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8540/// instructions with an additional negation of the accumulator
8544 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8545 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8546 assert(IdxMulOpd == 1);
8547
8548 Register NewVR =
8549 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8550
8551 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8552 FMAInstKind::Indexed, &NewVR);
8553}
8554
8555/// genMaddR - Generate madd instruction and combine mul and add using
8556/// an extra virtual register
8557/// Example - an ADD intermediate needs to be stored in a register:
8558/// MUL I=A,B,0
8559/// ADD R,I,Imm
8560/// ==> ORR V, ZR, Imm
8561/// ==> MADD R,A,B,V
8562/// \param MF Containing MachineFunction
8563/// \param MRI Register information
8564/// \param TII Target information
8565/// \param Root is the ADD instruction
8566/// \param [out] InsInstrs is a vector of machine instructions and will
8567/// contain the generated madd instruction
8568/// \param IdxMulOpd is index of operand in Root that is the result of
8569/// the MUL. In the example above IdxMulOpd is 1.
8570/// \param MaddOpc the opcode fo the madd instruction
8571/// \param VR is a virtual register that holds the value of an ADD operand
8572/// (V in the example above).
8573/// \param RC Register class of operands
8575 const TargetInstrInfo *TII, MachineInstr &Root,
8577 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8578 const TargetRegisterClass *RC) {
8579 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8580
8581 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8582 Register ResultReg = Root.getOperand(0).getReg();
8583 Register SrcReg0 = MUL->getOperand(1).getReg();
8584 bool Src0IsKill = MUL->getOperand(1).isKill();
8585 Register SrcReg1 = MUL->getOperand(2).getReg();
8586 bool Src1IsKill = MUL->getOperand(2).isKill();
8587
8588 if (ResultReg.isVirtual())
8589 MRI.constrainRegClass(ResultReg, RC);
8590 if (SrcReg0.isVirtual())
8591 MRI.constrainRegClass(SrcReg0, RC);
8592 if (SrcReg1.isVirtual())
8593 MRI.constrainRegClass(SrcReg1, RC);
8595 MRI.constrainRegClass(VR, RC);
8596
8598 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8599 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8600 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8601 .addReg(VR);
8602 // Insert the MADD
8603 InsInstrs.push_back(MIB);
8604 return MUL;
8605}
8606
8607/// Do the following transformation
8608/// A - (B + C) ==> (A - B) - C
8609/// A - (B + C) ==> (A - C) - B
8611 const TargetInstrInfo *TII, MachineInstr &Root,
8614 unsigned IdxOpd1,
8615 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8616 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8617 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8618 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8619
8620 Register ResultReg = Root.getOperand(0).getReg();
8621 Register RegA = Root.getOperand(1).getReg();
8622 bool RegAIsKill = Root.getOperand(1).isKill();
8623 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8624 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8625 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8626 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8627 Register NewVR =
8628 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8629
8630 unsigned Opcode = Root.getOpcode();
8631 if (Opcode == AArch64::SUBSWrr)
8632 Opcode = AArch64::SUBWrr;
8633 else if (Opcode == AArch64::SUBSXrr)
8634 Opcode = AArch64::SUBXrr;
8635 else
8636 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8637 "Unexpected instruction opcode.");
8638
8639 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8640 Flags &= ~MachineInstr::NoSWrap;
8641 Flags &= ~MachineInstr::NoUWrap;
8642
8643 MachineInstrBuilder MIB1 =
8644 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8645 .addReg(RegA, getKillRegState(RegAIsKill))
8646 .addReg(RegB, getKillRegState(RegBIsKill))
8647 .setMIFlags(Flags);
8648 MachineInstrBuilder MIB2 =
8649 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8650 .addReg(NewVR, getKillRegState(true))
8651 .addReg(RegC, getKillRegState(RegCIsKill))
8652 .setMIFlags(Flags);
8653
8654 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8655 InsInstrs.push_back(MIB1);
8656 InsInstrs.push_back(MIB2);
8657 DelInstrs.push_back(AddMI);
8658 DelInstrs.push_back(&Root);
8659}
8660
8661unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8662 unsigned int AccumulatorOpCode) const {
8663 switch (AccumulatorOpCode) {
8664 case AArch64::UABALB_ZZZ_D:
8665 case AArch64::SABALB_ZZZ_D:
8666 case AArch64::UABALT_ZZZ_D:
8667 case AArch64::SABALT_ZZZ_D:
8668 return AArch64::ADD_ZZZ_D;
8669 case AArch64::UABALB_ZZZ_H:
8670 case AArch64::SABALB_ZZZ_H:
8671 case AArch64::UABALT_ZZZ_H:
8672 case AArch64::SABALT_ZZZ_H:
8673 return AArch64::ADD_ZZZ_H;
8674 case AArch64::UABALB_ZZZ_S:
8675 case AArch64::SABALB_ZZZ_S:
8676 case AArch64::UABALT_ZZZ_S:
8677 case AArch64::SABALT_ZZZ_S:
8678 return AArch64::ADD_ZZZ_S;
8679 case AArch64::UABALv16i8_v8i16:
8680 case AArch64::SABALv8i8_v8i16:
8681 case AArch64::SABAv8i16:
8682 case AArch64::UABAv8i16:
8683 return AArch64::ADDv8i16;
8684 case AArch64::SABALv2i32_v2i64:
8685 case AArch64::UABALv2i32_v2i64:
8686 case AArch64::SABALv4i32_v2i64:
8687 return AArch64::ADDv2i64;
8688 case AArch64::UABALv4i16_v4i32:
8689 case AArch64::SABALv4i16_v4i32:
8690 case AArch64::SABALv8i16_v4i32:
8691 case AArch64::SABAv4i32:
8692 case AArch64::UABAv4i32:
8693 return AArch64::ADDv4i32;
8694 case AArch64::UABALv4i32_v2i64:
8695 return AArch64::ADDv2i64;
8696 case AArch64::UABALv8i16_v4i32:
8697 return AArch64::ADDv4i32;
8698 case AArch64::UABALv8i8_v8i16:
8699 case AArch64::SABALv16i8_v8i16:
8700 return AArch64::ADDv8i16;
8701 case AArch64::UABAv16i8:
8702 case AArch64::SABAv16i8:
8703 return AArch64::ADDv16i8;
8704 case AArch64::UABAv4i16:
8705 case AArch64::SABAv4i16:
8706 return AArch64::ADDv4i16;
8707 case AArch64::UABAv2i32:
8708 case AArch64::SABAv2i32:
8709 return AArch64::ADDv2i32;
8710 case AArch64::UABAv8i8:
8711 case AArch64::SABAv8i8:
8712 return AArch64::ADDv8i8;
8713 default:
8714 llvm_unreachable("Unknown accumulator opcode");
8715 }
8716}
8717
8718/// When getMachineCombinerPatterns() finds potential patterns,
8719/// this function generates the instructions that could replace the
8720/// original code sequence
8721void AArch64InstrInfo::genAlternativeCodeSequence(
8722 MachineInstr &Root, unsigned Pattern,
8725 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8726 MachineBasicBlock &MBB = *Root.getParent();
8727 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8728 MachineFunction &MF = *MBB.getParent();
8729 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8730
8731 MachineInstr *MUL = nullptr;
8732 const TargetRegisterClass *RC;
8733 unsigned Opc;
8734 switch (Pattern) {
8735 default:
8736 // Reassociate instructions.
8737 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8738 DelInstrs, InstrIdxForVirtReg);
8739 return;
8741 // A - (B + C)
8742 // ==> (A - B) - C
8743 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8744 InstrIdxForVirtReg);
8745 return;
8747 // A - (B + C)
8748 // ==> (A - C) - B
8749 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8750 InstrIdxForVirtReg);
8751 return;
8754 // MUL I=A,B,0
8755 // ADD R,I,C
8756 // ==> MADD R,A,B,C
8757 // --- Create(MADD);
8759 Opc = AArch64::MADDWrrr;
8760 RC = &AArch64::GPR32RegClass;
8761 } else {
8762 Opc = AArch64::MADDXrrr;
8763 RC = &AArch64::GPR64RegClass;
8764 }
8765 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8766 break;
8769 // MUL I=A,B,0
8770 // ADD R,C,I
8771 // ==> MADD R,A,B,C
8772 // --- Create(MADD);
8774 Opc = AArch64::MADDWrrr;
8775 RC = &AArch64::GPR32RegClass;
8776 } else {
8777 Opc = AArch64::MADDXrrr;
8778 RC = &AArch64::GPR64RegClass;
8779 }
8780 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8781 break;
8786 // MUL I=A,B,0
8787 // ADD/SUB R,I,Imm
8788 // ==> MOV V, Imm/-Imm
8789 // ==> MADD R,A,B,V
8790 // --- Create(MADD);
8791 const TargetRegisterClass *RC;
8792 unsigned BitSize, MovImm;
8795 MovImm = AArch64::MOVi32imm;
8796 RC = &AArch64::GPR32spRegClass;
8797 BitSize = 32;
8798 Opc = AArch64::MADDWrrr;
8799 RC = &AArch64::GPR32RegClass;
8800 } else {
8801 MovImm = AArch64::MOVi64imm;
8802 RC = &AArch64::GPR64spRegClass;
8803 BitSize = 64;
8804 Opc = AArch64::MADDXrrr;
8805 RC = &AArch64::GPR64RegClass;
8806 }
8807 Register NewVR = MRI.createVirtualRegister(RC);
8808 uint64_t Imm = Root.getOperand(2).getImm();
8809
8810 if (Root.getOperand(3).isImm()) {
8811 unsigned Val = Root.getOperand(3).getImm();
8812 Imm = Imm << Val;
8813 }
8814 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8816 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8817 // Check that the immediate can be composed via a single instruction.
8819 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8820 if (Insn.size() != 1)
8821 return;
8822 MachineInstrBuilder MIB1 =
8823 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8824 .addImm(IsSub ? -Imm : Imm);
8825 InsInstrs.push_back(MIB1);
8826 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8827 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8828 break;
8829 }
8832 // MUL I=A,B,0
8833 // SUB R,I, C
8834 // ==> SUB V, 0, C
8835 // ==> MADD R,A,B,V // = -C + A*B
8836 // --- Create(MADD);
8837 const TargetRegisterClass *SubRC;
8838 unsigned SubOpc, ZeroReg;
8840 SubOpc = AArch64::SUBWrr;
8841 SubRC = &AArch64::GPR32spRegClass;
8842 ZeroReg = AArch64::WZR;
8843 Opc = AArch64::MADDWrrr;
8844 RC = &AArch64::GPR32RegClass;
8845 } else {
8846 SubOpc = AArch64::SUBXrr;
8847 SubRC = &AArch64::GPR64spRegClass;
8848 ZeroReg = AArch64::XZR;
8849 Opc = AArch64::MADDXrrr;
8850 RC = &AArch64::GPR64RegClass;
8851 }
8852 Register NewVR = MRI.createVirtualRegister(SubRC);
8853 // SUB NewVR, 0, C
8854 MachineInstrBuilder MIB1 =
8855 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8856 .addReg(ZeroReg)
8857 .add(Root.getOperand(2));
8858 InsInstrs.push_back(MIB1);
8859 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8860 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8861 break;
8862 }
8865 // MUL I=A,B,0
8866 // SUB R,C,I
8867 // ==> MSUB R,A,B,C (computes C - A*B)
8868 // --- Create(MSUB);
8870 Opc = AArch64::MSUBWrrr;
8871 RC = &AArch64::GPR32RegClass;
8872 } else {
8873 Opc = AArch64::MSUBXrrr;
8874 RC = &AArch64::GPR64RegClass;
8875 }
8876 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8877 break;
8879 Opc = AArch64::MLAv8i8;
8880 RC = &AArch64::FPR64RegClass;
8881 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8882 break;
8884 Opc = AArch64::MLAv8i8;
8885 RC = &AArch64::FPR64RegClass;
8886 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8887 break;
8889 Opc = AArch64::MLAv16i8;
8890 RC = &AArch64::FPR128RegClass;
8891 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8892 break;
8894 Opc = AArch64::MLAv16i8;
8895 RC = &AArch64::FPR128RegClass;
8896 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8897 break;
8899 Opc = AArch64::MLAv4i16;
8900 RC = &AArch64::FPR64RegClass;
8901 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8902 break;
8904 Opc = AArch64::MLAv4i16;
8905 RC = &AArch64::FPR64RegClass;
8906 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8907 break;
8909 Opc = AArch64::MLAv8i16;
8910 RC = &AArch64::FPR128RegClass;
8911 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8912 break;
8914 Opc = AArch64::MLAv8i16;
8915 RC = &AArch64::FPR128RegClass;
8916 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8917 break;
8919 Opc = AArch64::MLAv2i32;
8920 RC = &AArch64::FPR64RegClass;
8921 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8922 break;
8924 Opc = AArch64::MLAv2i32;
8925 RC = &AArch64::FPR64RegClass;
8926 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8927 break;
8929 Opc = AArch64::MLAv4i32;
8930 RC = &AArch64::FPR128RegClass;
8931 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8932 break;
8934 Opc = AArch64::MLAv4i32;
8935 RC = &AArch64::FPR128RegClass;
8936 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8937 break;
8938
8940 Opc = AArch64::MLAv8i8;
8941 RC = &AArch64::FPR64RegClass;
8942 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8943 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8944 RC);
8945 break;
8947 Opc = AArch64::MLSv8i8;
8948 RC = &AArch64::FPR64RegClass;
8949 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8950 break;
8952 Opc = AArch64::MLAv16i8;
8953 RC = &AArch64::FPR128RegClass;
8954 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8955 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8956 RC);
8957 break;
8959 Opc = AArch64::MLSv16i8;
8960 RC = &AArch64::FPR128RegClass;
8961 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8962 break;
8964 Opc = AArch64::MLAv4i16;
8965 RC = &AArch64::FPR64RegClass;
8966 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8967 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8968 RC);
8969 break;
8971 Opc = AArch64::MLSv4i16;
8972 RC = &AArch64::FPR64RegClass;
8973 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8974 break;
8976 Opc = AArch64::MLAv8i16;
8977 RC = &AArch64::FPR128RegClass;
8978 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8979 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8980 RC);
8981 break;
8983 Opc = AArch64::MLSv8i16;
8984 RC = &AArch64::FPR128RegClass;
8985 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8986 break;
8988 Opc = AArch64::MLAv2i32;
8989 RC = &AArch64::FPR64RegClass;
8990 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8991 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8992 RC);
8993 break;
8995 Opc = AArch64::MLSv2i32;
8996 RC = &AArch64::FPR64RegClass;
8997 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8998 break;
9000 Opc = AArch64::MLAv4i32;
9001 RC = &AArch64::FPR128RegClass;
9002 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
9003 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
9004 RC);
9005 break;
9007 Opc = AArch64::MLSv4i32;
9008 RC = &AArch64::FPR128RegClass;
9009 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9010 break;
9011
9013 Opc = AArch64::MLAv4i16_indexed;
9014 RC = &AArch64::FPR64RegClass;
9015 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9016 break;
9018 Opc = AArch64::MLAv4i16_indexed;
9019 RC = &AArch64::FPR64RegClass;
9020 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9021 break;
9023 Opc = AArch64::MLAv8i16_indexed;
9024 RC = &AArch64::FPR128RegClass;
9025 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9026 break;
9028 Opc = AArch64::MLAv8i16_indexed;
9029 RC = &AArch64::FPR128RegClass;
9030 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9031 break;
9033 Opc = AArch64::MLAv2i32_indexed;
9034 RC = &AArch64::FPR64RegClass;
9035 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9036 break;
9038 Opc = AArch64::MLAv2i32_indexed;
9039 RC = &AArch64::FPR64RegClass;
9040 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9041 break;
9043 Opc = AArch64::MLAv4i32_indexed;
9044 RC = &AArch64::FPR128RegClass;
9045 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9046 break;
9048 Opc = AArch64::MLAv4i32_indexed;
9049 RC = &AArch64::FPR128RegClass;
9050 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9051 break;
9052
9054 Opc = AArch64::MLAv4i16_indexed;
9055 RC = &AArch64::FPR64RegClass;
9056 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9057 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
9058 RC);
9059 break;
9061 Opc = AArch64::MLSv4i16_indexed;
9062 RC = &AArch64::FPR64RegClass;
9063 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9064 break;
9066 Opc = AArch64::MLAv8i16_indexed;
9067 RC = &AArch64::FPR128RegClass;
9068 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9069 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
9070 RC);
9071 break;
9073 Opc = AArch64::MLSv8i16_indexed;
9074 RC = &AArch64::FPR128RegClass;
9075 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9076 break;
9078 Opc = AArch64::MLAv2i32_indexed;
9079 RC = &AArch64::FPR64RegClass;
9080 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9081 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
9082 RC);
9083 break;
9085 Opc = AArch64::MLSv2i32_indexed;
9086 RC = &AArch64::FPR64RegClass;
9087 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9088 break;
9090 Opc = AArch64::MLAv4i32_indexed;
9091 RC = &AArch64::FPR128RegClass;
9092 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
9093 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
9094 RC);
9095 break;
9097 Opc = AArch64::MLSv4i32_indexed;
9098 RC = &AArch64::FPR128RegClass;
9099 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9100 break;
9101
9102 // Floating Point Support
9104 Opc = AArch64::FMADDHrrr;
9105 RC = &AArch64::FPR16RegClass;
9106 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9107 break;
9109 Opc = AArch64::FMADDSrrr;
9110 RC = &AArch64::FPR32RegClass;
9111 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9112 break;
9114 Opc = AArch64::FMADDDrrr;
9115 RC = &AArch64::FPR64RegClass;
9116 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9117 break;
9118
9120 Opc = AArch64::FMADDHrrr;
9121 RC = &AArch64::FPR16RegClass;
9122 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9123 break;
9125 Opc = AArch64::FMADDSrrr;
9126 RC = &AArch64::FPR32RegClass;
9127 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9128 break;
9130 Opc = AArch64::FMADDDrrr;
9131 RC = &AArch64::FPR64RegClass;
9132 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9133 break;
9134
9136 Opc = AArch64::FMLAv1i32_indexed;
9137 RC = &AArch64::FPR32RegClass;
9138 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9140 break;
9142 Opc = AArch64::FMLAv1i32_indexed;
9143 RC = &AArch64::FPR32RegClass;
9144 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9146 break;
9147
9149 Opc = AArch64::FMLAv1i64_indexed;
9150 RC = &AArch64::FPR64RegClass;
9151 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9153 break;
9155 Opc = AArch64::FMLAv1i64_indexed;
9156 RC = &AArch64::FPR64RegClass;
9157 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9159 break;
9160
9162 RC = &AArch64::FPR64RegClass;
9163 Opc = AArch64::FMLAv4i16_indexed;
9164 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9166 break;
9168 RC = &AArch64::FPR64RegClass;
9169 Opc = AArch64::FMLAv4f16;
9170 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9172 break;
9174 RC = &AArch64::FPR64RegClass;
9175 Opc = AArch64::FMLAv4i16_indexed;
9176 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9178 break;
9180 RC = &AArch64::FPR64RegClass;
9181 Opc = AArch64::FMLAv4f16;
9182 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9184 break;
9185
9188 RC = &AArch64::FPR64RegClass;
9190 Opc = AArch64::FMLAv2i32_indexed;
9191 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9193 } else {
9194 Opc = AArch64::FMLAv2f32;
9195 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9197 }
9198 break;
9201 RC = &AArch64::FPR64RegClass;
9203 Opc = AArch64::FMLAv2i32_indexed;
9204 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9206 } else {
9207 Opc = AArch64::FMLAv2f32;
9208 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9210 }
9211 break;
9212
9214 RC = &AArch64::FPR128RegClass;
9215 Opc = AArch64::FMLAv8i16_indexed;
9216 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9218 break;
9220 RC = &AArch64::FPR128RegClass;
9221 Opc = AArch64::FMLAv8f16;
9222 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9224 break;
9226 RC = &AArch64::FPR128RegClass;
9227 Opc = AArch64::FMLAv8i16_indexed;
9228 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9230 break;
9232 RC = &AArch64::FPR128RegClass;
9233 Opc = AArch64::FMLAv8f16;
9234 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9236 break;
9237
9240 RC = &AArch64::FPR128RegClass;
9242 Opc = AArch64::FMLAv2i64_indexed;
9243 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9245 } else {
9246 Opc = AArch64::FMLAv2f64;
9247 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9249 }
9250 break;
9253 RC = &AArch64::FPR128RegClass;
9255 Opc = AArch64::FMLAv2i64_indexed;
9256 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9258 } else {
9259 Opc = AArch64::FMLAv2f64;
9260 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9262 }
9263 break;
9264
9267 RC = &AArch64::FPR128RegClass;
9269 Opc = AArch64::FMLAv4i32_indexed;
9270 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9272 } else {
9273 Opc = AArch64::FMLAv4f32;
9274 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9276 }
9277 break;
9278
9281 RC = &AArch64::FPR128RegClass;
9283 Opc = AArch64::FMLAv4i32_indexed;
9284 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9286 } else {
9287 Opc = AArch64::FMLAv4f32;
9288 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9290 }
9291 break;
9292
9294 Opc = AArch64::FNMSUBHrrr;
9295 RC = &AArch64::FPR16RegClass;
9296 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9297 break;
9299 Opc = AArch64::FNMSUBSrrr;
9300 RC = &AArch64::FPR32RegClass;
9301 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9302 break;
9304 Opc = AArch64::FNMSUBDrrr;
9305 RC = &AArch64::FPR64RegClass;
9306 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9307 break;
9308
9310 Opc = AArch64::FNMADDHrrr;
9311 RC = &AArch64::FPR16RegClass;
9312 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9313 break;
9315 Opc = AArch64::FNMADDSrrr;
9316 RC = &AArch64::FPR32RegClass;
9317 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9318 break;
9320 Opc = AArch64::FNMADDDrrr;
9321 RC = &AArch64::FPR64RegClass;
9322 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9323 break;
9324
9326 Opc = AArch64::FMSUBHrrr;
9327 RC = &AArch64::FPR16RegClass;
9328 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9329 break;
9331 Opc = AArch64::FMSUBSrrr;
9332 RC = &AArch64::FPR32RegClass;
9333 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9334 break;
9336 Opc = AArch64::FMSUBDrrr;
9337 RC = &AArch64::FPR64RegClass;
9338 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9339 break;
9340
9342 Opc = AArch64::FMLSv1i32_indexed;
9343 RC = &AArch64::FPR32RegClass;
9344 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9346 break;
9347
9349 Opc = AArch64::FMLSv1i64_indexed;
9350 RC = &AArch64::FPR64RegClass;
9351 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9353 break;
9354
9357 RC = &AArch64::FPR64RegClass;
9358 Register NewVR = MRI.createVirtualRegister(RC);
9359 MachineInstrBuilder MIB1 =
9360 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
9361 .add(Root.getOperand(2));
9362 InsInstrs.push_back(MIB1);
9363 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9365 Opc = AArch64::FMLAv4f16;
9366 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9367 FMAInstKind::Accumulator, &NewVR);
9368 } else {
9369 Opc = AArch64::FMLAv4i16_indexed;
9370 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9371 FMAInstKind::Indexed, &NewVR);
9372 }
9373 break;
9374 }
9376 RC = &AArch64::FPR64RegClass;
9377 Opc = AArch64::FMLSv4f16;
9378 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9380 break;
9382 RC = &AArch64::FPR64RegClass;
9383 Opc = AArch64::FMLSv4i16_indexed;
9384 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9386 break;
9387
9390 RC = &AArch64::FPR64RegClass;
9392 Opc = AArch64::FMLSv2i32_indexed;
9393 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9395 } else {
9396 Opc = AArch64::FMLSv2f32;
9397 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9399 }
9400 break;
9401
9404 RC = &AArch64::FPR128RegClass;
9405 Register NewVR = MRI.createVirtualRegister(RC);
9406 MachineInstrBuilder MIB1 =
9407 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
9408 .add(Root.getOperand(2));
9409 InsInstrs.push_back(MIB1);
9410 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9412 Opc = AArch64::FMLAv8f16;
9413 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9414 FMAInstKind::Accumulator, &NewVR);
9415 } else {
9416 Opc = AArch64::FMLAv8i16_indexed;
9417 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9418 FMAInstKind::Indexed, &NewVR);
9419 }
9420 break;
9421 }
9423 RC = &AArch64::FPR128RegClass;
9424 Opc = AArch64::FMLSv8f16;
9425 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9427 break;
9429 RC = &AArch64::FPR128RegClass;
9430 Opc = AArch64::FMLSv8i16_indexed;
9431 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9433 break;
9434
9437 RC = &AArch64::FPR128RegClass;
9439 Opc = AArch64::FMLSv2i64_indexed;
9440 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9442 } else {
9443 Opc = AArch64::FMLSv2f64;
9444 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9446 }
9447 break;
9448
9451 RC = &AArch64::FPR128RegClass;
9453 Opc = AArch64::FMLSv4i32_indexed;
9454 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9456 } else {
9457 Opc = AArch64::FMLSv4f32;
9458 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9460 }
9461 break;
9464 RC = &AArch64::FPR64RegClass;
9465 Register NewVR = MRI.createVirtualRegister(RC);
9466 MachineInstrBuilder MIB1 =
9467 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9468 .add(Root.getOperand(2));
9469 InsInstrs.push_back(MIB1);
9470 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9472 Opc = AArch64::FMLAv2i32_indexed;
9473 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9474 FMAInstKind::Indexed, &NewVR);
9475 } else {
9476 Opc = AArch64::FMLAv2f32;
9477 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9478 FMAInstKind::Accumulator, &NewVR);
9479 }
9480 break;
9481 }
9484 RC = &AArch64::FPR128RegClass;
9485 Register NewVR = MRI.createVirtualRegister(RC);
9486 MachineInstrBuilder MIB1 =
9487 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9488 .add(Root.getOperand(2));
9489 InsInstrs.push_back(MIB1);
9490 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9492 Opc = AArch64::FMLAv4i32_indexed;
9493 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9494 FMAInstKind::Indexed, &NewVR);
9495 } else {
9496 Opc = AArch64::FMLAv4f32;
9497 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9498 FMAInstKind::Accumulator, &NewVR);
9499 }
9500 break;
9501 }
9504 RC = &AArch64::FPR128RegClass;
9505 Register NewVR = MRI.createVirtualRegister(RC);
9506 MachineInstrBuilder MIB1 =
9507 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9508 .add(Root.getOperand(2));
9509 InsInstrs.push_back(MIB1);
9510 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9512 Opc = AArch64::FMLAv2i64_indexed;
9513 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9514 FMAInstKind::Indexed, &NewVR);
9515 } else {
9516 Opc = AArch64::FMLAv2f64;
9517 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9518 FMAInstKind::Accumulator, &NewVR);
9519 }
9520 break;
9521 }
9524 unsigned IdxDupOp =
9526 : 2;
9527 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9528 &AArch64::FPR128RegClass, MRI);
9529 break;
9530 }
9533 unsigned IdxDupOp =
9535 : 2;
9536 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9537 &AArch64::FPR128RegClass, MRI);
9538 break;
9539 }
9542 unsigned IdxDupOp =
9544 : 2;
9545 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9546 &AArch64::FPR128_loRegClass, MRI);
9547 break;
9548 }
9551 unsigned IdxDupOp =
9553 : 2;
9554 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9555 &AArch64::FPR128RegClass, MRI);
9556 break;
9557 }
9560 unsigned IdxDupOp =
9562 : 2;
9563 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9564 &AArch64::FPR128_loRegClass, MRI);
9565 break;
9566 }
9568 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9569 break;
9570 }
9572 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9573 Pattern, 4);
9574 break;
9575 }
9577 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9578 Pattern, 8);
9579 break;
9580 }
9582 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9583 Pattern, 16);
9584 break;
9585 }
9586
9587 } // end switch (Pattern)
9588 // Record MUL and ADD/SUB for deletion
9589 if (MUL)
9590 DelInstrs.push_back(MUL);
9591 DelInstrs.push_back(&Root);
9592
9593 // Set the flags on the inserted instructions to be the merged flags of the
9594 // instructions that we have combined.
9595 uint32_t Flags = Root.getFlags();
9596 if (MUL)
9597 Flags = Root.mergeFlagsWith(*MUL);
9598 for (auto *MI : InsInstrs)
9599 MI->setFlags(Flags);
9600}
9601
9602/// Replace csincr-branch sequence by simple conditional branch
9603///
9604/// Examples:
9605/// 1. \code
9606/// csinc w9, wzr, wzr, <condition code>
9607/// tbnz w9, #0, 0x44
9608/// \endcode
9609/// to
9610/// \code
9611/// b.<inverted condition code>
9612/// \endcode
9613///
9614/// 2. \code
9615/// csinc w9, wzr, wzr, <condition code>
9616/// tbz w9, #0, 0x44
9617/// \endcode
9618/// to
9619/// \code
9620/// b.<condition code>
9621/// \endcode
9622///
9623/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9624/// compare's constant operand is power of 2.
9625///
9626/// Examples:
9627/// \code
9628/// and w8, w8, #0x400
9629/// cbnz w8, L1
9630/// \endcode
9631/// to
9632/// \code
9633/// tbnz w8, #10, L1
9634/// \endcode
9635///
9636/// \param MI Conditional Branch
9637/// \return True when the simple conditional branch is generated
9638///
9640 bool IsNegativeBranch = false;
9641 bool IsTestAndBranch = false;
9642 unsigned TargetBBInMI = 0;
9643 switch (MI.getOpcode()) {
9644 default:
9645 llvm_unreachable("Unknown branch instruction?");
9646 case AArch64::Bcc:
9647 case AArch64::CBWPri:
9648 case AArch64::CBXPri:
9649 case AArch64::CBBAssertExt:
9650 case AArch64::CBHAssertExt:
9651 case AArch64::CBWPrr:
9652 case AArch64::CBXPrr:
9653 return false;
9654 case AArch64::CBZW:
9655 case AArch64::CBZX:
9656 TargetBBInMI = 1;
9657 break;
9658 case AArch64::CBNZW:
9659 case AArch64::CBNZX:
9660 TargetBBInMI = 1;
9661 IsNegativeBranch = true;
9662 break;
9663 case AArch64::TBZW:
9664 case AArch64::TBZX:
9665 TargetBBInMI = 2;
9666 IsTestAndBranch = true;
9667 break;
9668 case AArch64::TBNZW:
9669 case AArch64::TBNZX:
9670 TargetBBInMI = 2;
9671 IsNegativeBranch = true;
9672 IsTestAndBranch = true;
9673 break;
9674 }
9675 // So we increment a zero register and test for bits other
9676 // than bit 0? Conservatively bail out in case the verifier
9677 // missed this case.
9678 if (IsTestAndBranch && MI.getOperand(1).getImm())
9679 return false;
9680
9681 // Find Definition.
9682 assert(MI.getParent() && "Incomplete machine instruction\n");
9683 MachineBasicBlock *MBB = MI.getParent();
9684 MachineFunction *MF = MBB->getParent();
9686 Register VReg = MI.getOperand(0).getReg();
9687 if (!VReg.isVirtual())
9688 return false;
9689
9690 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9691
9692 // Look through COPY instructions to find definition.
9693 while (DefMI->isCopy()) {
9694 Register CopyVReg = DefMI->getOperand(1).getReg();
9695 if (!MRI->hasOneNonDBGUse(CopyVReg))
9696 return false;
9697 if (!MRI->hasOneDef(CopyVReg))
9698 return false;
9699 DefMI = MRI->getVRegDef(CopyVReg);
9700 }
9701
9702 switch (DefMI->getOpcode()) {
9703 default:
9704 return false;
9705 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9706 case AArch64::ANDWri:
9707 case AArch64::ANDXri: {
9708 if (IsTestAndBranch)
9709 return false;
9710 if (DefMI->getParent() != MBB)
9711 return false;
9712 if (!MRI->hasOneNonDBGUse(VReg))
9713 return false;
9714
9715 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9717 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9718 if (!isPowerOf2_64(Mask))
9719 return false;
9720
9721 MachineOperand &MO = DefMI->getOperand(1);
9722 Register NewReg = MO.getReg();
9723 if (!NewReg.isVirtual())
9724 return false;
9725
9726 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9727
9728 MachineBasicBlock &RefToMBB = *MBB;
9729 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9730 DebugLoc DL = MI.getDebugLoc();
9731 unsigned Imm = Log2_64(Mask);
9732 unsigned Opc = (Imm < 32)
9733 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9734 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9735 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9736 .addReg(NewReg)
9737 .addImm(Imm)
9738 .addMBB(TBB);
9739 // Register lives on to the CBZ now.
9740 MO.setIsKill(false);
9741
9742 // For immediate smaller than 32, we need to use the 32-bit
9743 // variant (W) in all cases. Indeed the 64-bit variant does not
9744 // allow to encode them.
9745 // Therefore, if the input register is 64-bit, we need to take the
9746 // 32-bit sub-part.
9747 if (!Is32Bit && Imm < 32)
9748 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9749 MI.eraseFromParent();
9750 return true;
9751 }
9752 // Look for CSINC
9753 case AArch64::CSINCWr:
9754 case AArch64::CSINCXr: {
9755 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9756 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9757 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9758 DefMI->getOperand(2).getReg() == AArch64::XZR))
9759 return false;
9760
9761 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9762 true) != -1)
9763 return false;
9764
9765 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9766 // Convert only when the condition code is not modified between
9767 // the CSINC and the branch. The CC may be used by other
9768 // instructions in between.
9770 return false;
9771 MachineBasicBlock &RefToMBB = *MBB;
9772 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9773 DebugLoc DL = MI.getDebugLoc();
9774 if (IsNegativeBranch)
9776 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9777 MI.eraseFromParent();
9778 return true;
9779 }
9780 }
9781}
9782
9783std::pair<unsigned, unsigned>
9784AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9785 const unsigned Mask = AArch64II::MO_FRAGMENT;
9786 return std::make_pair(TF & Mask, TF & ~Mask);
9787}
9788
9790AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9791 using namespace AArch64II;
9792
9793 static const std::pair<unsigned, const char *> TargetFlags[] = {
9794 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9795 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9796 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9797 {MO_HI12, "aarch64-hi12"}};
9798 return ArrayRef(TargetFlags);
9799}
9800
9802AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9803 using namespace AArch64II;
9804
9805 static const std::pair<unsigned, const char *> TargetFlags[] = {
9806 {MO_COFFSTUB, "aarch64-coffstub"},
9807 {MO_GOT, "aarch64-got"},
9808 {MO_NC, "aarch64-nc"},
9809 {MO_S, "aarch64-s"},
9810 {MO_TLS, "aarch64-tls"},
9811 {MO_DLLIMPORT, "aarch64-dllimport"},
9812 {MO_PREL, "aarch64-prel"},
9813 {MO_TAGGED, "aarch64-tagged"},
9814 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9815 };
9816 return ArrayRef(TargetFlags);
9817}
9818
9820AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9821 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9822 {{MOSuppressPair, "aarch64-suppress-pair"},
9823 {MOStridedAccess, "aarch64-strided-access"}};
9824 return ArrayRef(TargetFlags);
9825}
9826
9827/// Constants defining how certain sequences should be outlined.
9828/// This encompasses how an outlined function should be called, and what kind of
9829/// frame should be emitted for that outlined function.
9830///
9831/// \p MachineOutlinerDefault implies that the function should be called with
9832/// a save and restore of LR to the stack.
9833///
9834/// That is,
9835///
9836/// I1 Save LR OUTLINED_FUNCTION:
9837/// I2 --> BL OUTLINED_FUNCTION I1
9838/// I3 Restore LR I2
9839/// I3
9840/// RET
9841///
9842/// * Call construction overhead: 3 (save + BL + restore)
9843/// * Frame construction overhead: 1 (ret)
9844/// * Requires stack fixups? Yes
9845///
9846/// \p MachineOutlinerTailCall implies that the function is being created from
9847/// a sequence of instructions ending in a return.
9848///
9849/// That is,
9850///
9851/// I1 OUTLINED_FUNCTION:
9852/// I2 --> B OUTLINED_FUNCTION I1
9853/// RET I2
9854/// RET
9855///
9856/// * Call construction overhead: 1 (B)
9857/// * Frame construction overhead: 0 (Return included in sequence)
9858/// * Requires stack fixups? No
9859///
9860/// \p MachineOutlinerNoLRSave implies that the function should be called using
9861/// a BL instruction, but doesn't require LR to be saved and restored. This
9862/// happens when LR is known to be dead.
9863///
9864/// That is,
9865///
9866/// I1 OUTLINED_FUNCTION:
9867/// I2 --> BL OUTLINED_FUNCTION I1
9868/// I3 I2
9869/// I3
9870/// RET
9871///
9872/// * Call construction overhead: 1 (BL)
9873/// * Frame construction overhead: 1 (RET)
9874/// * Requires stack fixups? No
9875///
9876/// \p MachineOutlinerThunk implies that the function is being created from
9877/// a sequence of instructions ending in a call. The outlined function is
9878/// called with a BL instruction, and the outlined function tail-calls the
9879/// original call destination.
9880///
9881/// That is,
9882///
9883/// I1 OUTLINED_FUNCTION:
9884/// I2 --> BL OUTLINED_FUNCTION I1
9885/// BL f I2
9886/// B f
9887/// * Call construction overhead: 1 (BL)
9888/// * Frame construction overhead: 0
9889/// * Requires stack fixups? No
9890///
9891/// \p MachineOutlinerRegSave implies that the function should be called with a
9892/// save and restore of LR to an available register. This allows us to avoid
9893/// stack fixups. Note that this outlining variant is compatible with the
9894/// NoLRSave case.
9895///
9896/// That is,
9897///
9898/// I1 Save LR OUTLINED_FUNCTION:
9899/// I2 --> BL OUTLINED_FUNCTION I1
9900/// I3 Restore LR I2
9901/// I3
9902/// RET
9903///
9904/// * Call construction overhead: 3 (save + BL + restore)
9905/// * Frame construction overhead: 1 (ret)
9906/// * Requires stack fixups? No
9908 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9909 MachineOutlinerTailCall, /// Only emit a branch.
9910 MachineOutlinerNoLRSave, /// Emit a call and return.
9911 MachineOutlinerThunk, /// Emit a call and tail-call.
9912 MachineOutlinerRegSave /// Same as default, but save to a register.
9913};
9914
9920
9922AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9923 MachineFunction *MF = C.getMF();
9924 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9925 const AArch64RegisterInfo *ARI =
9926 static_cast<const AArch64RegisterInfo *>(&TRI);
9927 // Check if there is an available register across the sequence that we can
9928 // use.
9929 for (unsigned Reg : AArch64::GPR64RegClass) {
9930 if (!ARI->isReservedReg(*MF, Reg) &&
9931 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9932 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9933 Reg != AArch64::X17 && // Ditto for X17.
9934 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9935 C.isAvailableInsideSeq(Reg, TRI))
9936 return Reg;
9937 }
9938 return Register();
9939}
9940
9941static bool
9943 const outliner::Candidate &b) {
9944 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9945 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9946
9947 return MFIa->getSignReturnAddressCondition() ==
9949}
9950
9951static bool
9953 const outliner::Candidate &b) {
9954 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9955 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9956
9957 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9958}
9959
9961 const outliner::Candidate &b) {
9962 const AArch64Subtarget &SubtargetA =
9964 const AArch64Subtarget &SubtargetB =
9965 b.getMF()->getSubtarget<AArch64Subtarget>();
9966 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9967}
9968
9969std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9970AArch64InstrInfo::getOutliningCandidateInfo(
9971 const MachineModuleInfo &MMI,
9972 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9973 unsigned MinRepeats) const {
9974 unsigned SequenceSize = 0;
9975 for (auto &MI : RepeatedSequenceLocs[0])
9976 SequenceSize += getInstSizeInBytes(MI);
9977
9978 unsigned NumBytesToCreateFrame = 0;
9979
9980 // Avoid splitting ADRP ADD/LDR pair into outlined functions.
9981 // These instructions are fused together by the scheduler.
9982 // Any candidate where ADRP is the last instruction should be rejected
9983 // as that will lead to splitting ADRP pair.
9984 MachineInstr &LastMI = RepeatedSequenceLocs[0].back();
9985 MachineInstr &FirstMI = RepeatedSequenceLocs[0].front();
9986 if (LastMI.getOpcode() == AArch64::ADRP &&
9987 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 &&
9988 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) {
9989 return std::nullopt;
9990 }
9991
9992 // Similarly any candidate where the first instruction is ADD/LDR with a
9993 // page offset should be rejected to avoid ADRP splitting.
9994 if ((FirstMI.getOpcode() == AArch64::ADDXri ||
9995 FirstMI.getOpcode() == AArch64::LDRXui) &&
9996 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 &&
9997 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) {
9998 return std::nullopt;
9999 }
10000
10001 // We only allow outlining for functions having exactly matching return
10002 // address signing attributes, i.e., all share the same value for the
10003 // attribute "sign-return-address" and all share the same type of key they
10004 // are signed with.
10005 // Additionally we require all functions to simultaneously either support
10006 // v8.3a features or not. Otherwise an outlined function could get signed
10007 // using dedicated v8.3 instructions and a call from a function that doesn't
10008 // support v8.3 instructions would therefore be invalid.
10009 if (std::adjacent_find(
10010 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
10011 [](const outliner::Candidate &a, const outliner::Candidate &b) {
10012 // Return true if a and b are non-equal w.r.t. return address
10013 // signing or support of v8.3a features
10014 if (outliningCandidatesSigningScopeConsensus(a, b) &&
10015 outliningCandidatesSigningKeyConsensus(a, b) &&
10016 outliningCandidatesV8_3OpsConsensus(a, b)) {
10017 return false;
10018 }
10019 return true;
10020 }) != RepeatedSequenceLocs.end()) {
10021 return std::nullopt;
10022 }
10023
10024 // Since at this point all candidates agree on their return address signing
10025 // picking just one is fine. If the candidate functions potentially sign their
10026 // return addresses, the outlined function should do the same. Note that in
10027 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
10028 // not certainly true that the outlined function will have to sign its return
10029 // address but this decision is made later, when the decision to outline
10030 // has already been made.
10031 // The same holds for the number of additional instructions we need: On
10032 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
10033 // necessary. However, at this point we don't know if the outlined function
10034 // will have a RET instruction so we assume the worst.
10035 const TargetRegisterInfo &TRI = getRegisterInfo();
10036 // Performing a tail call may require extra checks when PAuth is enabled.
10037 // If PAuth is disabled, set it to zero for uniformity.
10038 unsigned NumBytesToCheckLRInTCEpilogue = 0;
10039 const auto RASignCondition = RepeatedSequenceLocs[0]
10040 .getMF()
10041 ->getInfo<AArch64FunctionInfo>()
10042 ->getSignReturnAddressCondition();
10043 if (RASignCondition != SignReturnAddress::None) {
10044 // One PAC and one AUT instructions
10045 NumBytesToCreateFrame += 8;
10046
10047 // PAuth is enabled - set extra tail call cost, if any.
10048 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
10049 *RepeatedSequenceLocs[0].getMF());
10050 NumBytesToCheckLRInTCEpilogue =
10052 // Checking the authenticated LR value may significantly impact
10053 // SequenceSize, so account for it for more precise results.
10054 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
10055 SequenceSize += NumBytesToCheckLRInTCEpilogue;
10056
10057 // We have to check if sp modifying instructions would get outlined.
10058 // If so we only allow outlining if sp is unchanged overall, so matching
10059 // sub and add instructions are okay to outline, all other sp modifications
10060 // are not
10061 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
10062 int SPValue = 0;
10063 for (auto &MI : C) {
10064 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
10065 switch (MI.getOpcode()) {
10066 case AArch64::ADDXri:
10067 case AArch64::ADDWri:
10068 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
10069 assert(MI.getOperand(2).isImm() &&
10070 "Expected operand to be immediate");
10071 assert(MI.getOperand(1).isReg() &&
10072 "Expected operand to be a register");
10073 // Check if the add just increments sp. If so, we search for
10074 // matching sub instructions that decrement sp. If not, the
10075 // modification is illegal
10076 if (MI.getOperand(1).getReg() == AArch64::SP)
10077 SPValue += MI.getOperand(2).getImm();
10078 else
10079 return true;
10080 break;
10081 case AArch64::SUBXri:
10082 case AArch64::SUBWri:
10083 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
10084 assert(MI.getOperand(2).isImm() &&
10085 "Expected operand to be immediate");
10086 assert(MI.getOperand(1).isReg() &&
10087 "Expected operand to be a register");
10088 // Check if the sub just decrements sp. If so, we search for
10089 // matching add instructions that increment sp. If not, the
10090 // modification is illegal
10091 if (MI.getOperand(1).getReg() == AArch64::SP)
10092 SPValue -= MI.getOperand(2).getImm();
10093 else
10094 return true;
10095 break;
10096 default:
10097 return true;
10098 }
10099 }
10100 }
10101 if (SPValue)
10102 return true;
10103 return false;
10104 };
10105 // Remove candidates with illegal stack modifying instructions
10106 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
10107
10108 // If the sequence doesn't have enough candidates left, then we're done.
10109 if (RepeatedSequenceLocs.size() < MinRepeats)
10110 return std::nullopt;
10111 }
10112
10113 // Properties about candidate MBBs that hold for all of them.
10114 unsigned FlagsSetInAll = 0xF;
10115
10116 // Compute liveness information for each candidate, and set FlagsSetInAll.
10117 for (outliner::Candidate &C : RepeatedSequenceLocs)
10118 FlagsSetInAll &= C.Flags;
10119
10120 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
10121
10122 // Helper lambda which sets call information for every candidate.
10123 auto SetCandidateCallInfo =
10124 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
10125 for (outliner::Candidate &C : RepeatedSequenceLocs)
10126 C.setCallInfo(CallID, NumBytesForCall);
10127 };
10128
10129 unsigned FrameID = MachineOutlinerDefault;
10130 NumBytesToCreateFrame += 4;
10131
10132 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
10133 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
10134 });
10135
10136 // We check to see if CFI Instructions are present, and if they are
10137 // we find the number of CFI Instructions in the candidates.
10138 unsigned CFICount = 0;
10139 for (auto &I : RepeatedSequenceLocs[0]) {
10140 if (I.isCFIInstruction())
10141 CFICount++;
10142 }
10143
10144 // We compare the number of found CFI Instructions to the number of CFI
10145 // instructions in the parent function for each candidate. We must check this
10146 // since if we outline one of the CFI instructions in a function, we have to
10147 // outline them all for correctness. If we do not, the address offsets will be
10148 // incorrect between the two sections of the program.
10149 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10150 std::vector<MCCFIInstruction> CFIInstructions =
10151 C.getMF()->getFrameInstructions();
10152
10153 if (CFICount > 0 && CFICount != CFIInstructions.size())
10154 return std::nullopt;
10155 }
10156
10157 // Returns true if an instructions is safe to fix up, false otherwise.
10158 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
10159 if (MI.isCall())
10160 return true;
10161
10162 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
10163 !MI.readsRegister(AArch64::SP, &TRI))
10164 return true;
10165
10166 // Any modification of SP will break our code to save/restore LR.
10167 // FIXME: We could handle some instructions which add a constant
10168 // offset to SP, with a bit more work.
10169 if (MI.modifiesRegister(AArch64::SP, &TRI))
10170 return false;
10171
10172 // At this point, we have a stack instruction that we might need to
10173 // fix up. We'll handle it if it's a load or store.
10174 if (MI.mayLoadOrStore()) {
10175 const MachineOperand *Base; // Filled with the base operand of MI.
10176 int64_t Offset; // Filled with the offset of MI.
10177 bool OffsetIsScalable;
10178
10179 // Does it allow us to offset the base operand and is the base the
10180 // register SP?
10181 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
10182 !Base->isReg() || Base->getReg() != AArch64::SP)
10183 return false;
10184
10185 // Fixe-up code below assumes bytes.
10186 if (OffsetIsScalable)
10187 return false;
10188
10189 // Find the minimum/maximum offset for this instruction and check
10190 // if fixing it up would be in range.
10191 int64_t MinOffset,
10192 MaxOffset; // Unscaled offsets for the instruction.
10193 // The scale to multiply the offsets by.
10194 TypeSize Scale(0U, false), DummyWidth(0U, false);
10195 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
10196
10197 Offset += 16; // Update the offset to what it would be if we outlined.
10198 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
10199 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
10200 return false;
10201
10202 // It's in range, so we can outline it.
10203 return true;
10204 }
10205
10206 // FIXME: Add handling for instructions like "add x0, sp, #8".
10207
10208 // We can't fix it up, so don't outline it.
10209 return false;
10210 };
10211
10212 // True if it's possible to fix up each stack instruction in this sequence.
10213 // Important for frames/call variants that modify the stack.
10214 bool AllStackInstrsSafe =
10215 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
10216
10217 // If the last instruction in any candidate is a terminator, then we should
10218 // tail call all of the candidates.
10219 if (RepeatedSequenceLocs[0].back().isTerminator()) {
10220 FrameID = MachineOutlinerTailCall;
10221 NumBytesToCreateFrame = 0;
10222 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
10223 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
10224 }
10225
10226 else if (LastInstrOpcode == AArch64::BL ||
10227 ((LastInstrOpcode == AArch64::BLR ||
10228 LastInstrOpcode == AArch64::BLRNoIP) &&
10229 !HasBTI)) {
10230 // FIXME: Do we need to check if the code after this uses the value of LR?
10231 FrameID = MachineOutlinerThunk;
10232 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
10233 SetCandidateCallInfo(MachineOutlinerThunk, 4);
10234 }
10235
10236 else {
10237 // We need to decide how to emit calls + frames. We can always emit the same
10238 // frame if we don't need to save to the stack. If we have to save to the
10239 // stack, then we need a different frame.
10240 unsigned NumBytesNoStackCalls = 0;
10241 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
10242
10243 // Check if we have to save LR.
10244 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10245 bool LRAvailable =
10247 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
10248 : true;
10249 // If we have a noreturn caller, then we're going to be conservative and
10250 // say that we have to save LR. If we don't have a ret at the end of the
10251 // block, then we can't reason about liveness accurately.
10252 //
10253 // FIXME: We can probably do better than always disabling this in
10254 // noreturn functions by fixing up the liveness info.
10255 bool IsNoReturn =
10256 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
10257
10258 // Is LR available? If so, we don't need a save.
10259 if (LRAvailable && !IsNoReturn) {
10260 NumBytesNoStackCalls += 4;
10261 C.setCallInfo(MachineOutlinerNoLRSave, 4);
10262 CandidatesWithoutStackFixups.push_back(C);
10263 }
10264
10265 // Is an unused register available? If so, we won't modify the stack, so
10266 // we can outline with the same frame type as those that don't save LR.
10267 else if (findRegisterToSaveLRTo(C)) {
10268 NumBytesNoStackCalls += 12;
10269 C.setCallInfo(MachineOutlinerRegSave, 12);
10270 CandidatesWithoutStackFixups.push_back(C);
10271 }
10272
10273 // Is SP used in the sequence at all? If not, we don't have to modify
10274 // the stack, so we are guaranteed to get the same frame.
10275 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
10276 NumBytesNoStackCalls += 12;
10277 C.setCallInfo(MachineOutlinerDefault, 12);
10278 CandidatesWithoutStackFixups.push_back(C);
10279 }
10280
10281 // If we outline this, we need to modify the stack. Pretend we don't
10282 // outline this by saving all of its bytes.
10283 else {
10284 NumBytesNoStackCalls += SequenceSize;
10285 }
10286 }
10287
10288 // If there are no places where we have to save LR, then note that we
10289 // don't have to update the stack. Otherwise, give every candidate the
10290 // default call type, as long as it's safe to do so.
10291 if (!AllStackInstrsSafe ||
10292 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
10293 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
10294 FrameID = MachineOutlinerNoLRSave;
10295 if (RepeatedSequenceLocs.size() < MinRepeats)
10296 return std::nullopt;
10297 } else {
10298 SetCandidateCallInfo(MachineOutlinerDefault, 12);
10299
10300 // Bugzilla ID: 46767
10301 // TODO: Check if fixing up the stack more than once is safe so we can
10302 // outline these.
10303 //
10304 // An outline resulting in a caller that requires stack fixups at the
10305 // callsite to a callee that also requires stack fixups can happen when
10306 // there are no available registers at the candidate callsite for a
10307 // candidate that itself also has calls.
10308 //
10309 // In other words if function_containing_sequence in the following pseudo
10310 // assembly requires that we save LR at the point of the call, but there
10311 // are no available registers: in this case we save using SP and as a
10312 // result the SP offsets requires stack fixups by multiples of 16.
10313 //
10314 // function_containing_sequence:
10315 // ...
10316 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10317 // call OUTLINED_FUNCTION_N
10318 // restore LR from SP
10319 // ...
10320 //
10321 // OUTLINED_FUNCTION_N:
10322 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10323 // ...
10324 // bl foo
10325 // restore LR from SP
10326 // ret
10327 //
10328 // Because the code to handle more than one stack fixup does not
10329 // currently have the proper checks for legality, these cases will assert
10330 // in the AArch64 MachineOutliner. This is because the code to do this
10331 // needs more hardening, testing, better checks that generated code is
10332 // legal, etc and because it is only verified to handle a single pass of
10333 // stack fixup.
10334 //
10335 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
10336 // these cases until they are known to be handled. Bugzilla 46767 is
10337 // referenced in comments at the assert site.
10338 //
10339 // To avoid asserting (or generating non-legal code on noassert builds)
10340 // we remove all candidates which would need more than one stack fixup by
10341 // pruning the cases where the candidate has calls while also having no
10342 // available LR and having no available general purpose registers to copy
10343 // LR to (ie one extra stack save/restore).
10344 //
10345 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10346 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
10347 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
10348 return (llvm::any_of(C, IsCall)) &&
10349 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
10350 !findRegisterToSaveLRTo(C));
10351 });
10352 }
10353 }
10354
10355 // If we dropped all of the candidates, bail out here.
10356 if (RepeatedSequenceLocs.size() < MinRepeats)
10357 return std::nullopt;
10358 }
10359
10360 // Does every candidate's MBB contain a call? If so, then we might have a call
10361 // in the range.
10362 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10363 // Check if the range contains a call. These require a save + restore of the
10364 // link register.
10365 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
10366 bool ModStackToSaveLR = false;
10367 if (any_of(drop_end(FirstCand),
10368 [](const MachineInstr &MI) { return MI.isCall(); }))
10369 ModStackToSaveLR = true;
10370
10371 // Handle the last instruction separately. If this is a tail call, then the
10372 // last instruction is a call. We don't want to save + restore in this case.
10373 // However, it could be possible that the last instruction is a call without
10374 // it being valid to tail call this sequence. We should consider this as
10375 // well.
10376 else if (FrameID != MachineOutlinerThunk &&
10377 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
10378 ModStackToSaveLR = true;
10379
10380 if (ModStackToSaveLR) {
10381 // We can't fix up the stack. Bail out.
10382 if (!AllStackInstrsSafe)
10383 return std::nullopt;
10384
10385 // Save + restore LR.
10386 NumBytesToCreateFrame += 8;
10387 }
10388 }
10389
10390 // If we have CFI instructions, we can only outline if the outlined section
10391 // can be a tail call
10392 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
10393 return std::nullopt;
10394
10395 return std::make_unique<outliner::OutlinedFunction>(
10396 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
10397}
10398
10399void AArch64InstrInfo::mergeOutliningCandidateAttributes(
10400 Function &F, std::vector<outliner::Candidate> &Candidates) const {
10401 // If a bunch of candidates reach this point they must agree on their return
10402 // address signing. It is therefore enough to just consider the signing
10403 // behaviour of one of them
10404 const auto &CFn = Candidates.front().getMF()->getFunction();
10405
10406 if (CFn.hasFnAttribute("ptrauth-returns"))
10407 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
10408 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
10409 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
10410 // Since all candidates belong to the same module, just copy the
10411 // function-level attributes of an arbitrary function.
10412 if (CFn.hasFnAttribute("sign-return-address"))
10413 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
10414 if (CFn.hasFnAttribute("sign-return-address-key"))
10415 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
10416
10417 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
10418}
10419
10420bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
10421 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10422 const Function &F = MF.getFunction();
10423
10424 // Can F be deduplicated by the linker? If it can, don't outline from it.
10425 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10426 return false;
10427
10428 // Don't outline from functions with section markings; the program could
10429 // expect that all the code is in the named section.
10430 // FIXME: Allow outlining from multiple functions with the same section
10431 // marking.
10432 if (F.hasSection())
10433 return false;
10434
10435 // Outlining from functions with redzones is unsafe since the outliner may
10436 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
10437 // outline from it.
10438 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
10439 if (!AFI || AFI->hasRedZone().value_or(true))
10440 return false;
10441
10442 // FIXME: Determine whether it is safe to outline from functions which contain
10443 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
10444 // outlined together and ensure it is safe to outline with async unwind info,
10445 // required for saving & restoring VG around calls.
10446 if (AFI->hasStreamingModeChanges())
10447 return false;
10448
10449 // FIXME: Teach the outliner to generate/handle Windows unwind info.
10451 return false;
10452
10453 // It's safe to outline from MF.
10454 return true;
10455}
10456
10458AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10459 unsigned &Flags) const {
10461 "Must track liveness!");
10463 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10464 Ranges;
10465 // According to the AArch64 Procedure Call Standard, the following are
10466 // undefined on entry/exit from a function call:
10467 //
10468 // * Registers x16, x17, (and thus w16, w17)
10469 // * Condition codes (and thus the NZCV register)
10470 //
10471 // If any of these registers are used inside or live across an outlined
10472 // function, then they may be modified later, either by the compiler or
10473 // some other tool (like the linker).
10474 //
10475 // To avoid outlining in these situations, partition each block into ranges
10476 // where these registers are dead. We will only outline from those ranges.
10477 LiveRegUnits LRU(getRegisterInfo());
10478 auto AreAllUnsafeRegsDead = [&LRU]() {
10479 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10480 LRU.available(AArch64::NZCV);
10481 };
10482
10483 // We need to know if LR is live across an outlining boundary later on in
10484 // order to decide how we'll create the outlined call, frame, etc.
10485 //
10486 // It's pretty expensive to check this for *every candidate* within a block.
10487 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10488 // to compute liveness from the end of the block for O(n) candidates within
10489 // the block.
10490 //
10491 // So, to improve the average case, let's keep track of liveness from the end
10492 // of the block to the beginning of *every outlinable range*. If we know that
10493 // LR is available in every range we could outline from, then we know that
10494 // we don't need to check liveness for any candidate within that range.
10495 bool LRAvailableEverywhere = true;
10496 // Compute liveness bottom-up.
10497 LRU.addLiveOuts(MBB);
10498 // Update flags that require info about the entire MBB.
10499 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10500 if (MI.isCall() && !MI.isTerminator())
10502 };
10503 // Range: [RangeBegin, RangeEnd)
10504 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10505 unsigned RangeLen;
10506 auto CreateNewRangeStartingAt =
10507 [&RangeBegin, &RangeEnd,
10508 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10509 RangeBegin = NewBegin;
10510 RangeEnd = std::next(RangeBegin);
10511 RangeLen = 0;
10512 };
10513 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10514 // At least one unsafe register is not dead. We do not want to outline at
10515 // this point. If it is long enough to outline from and does not cross a
10516 // bundle boundary, save the range [RangeBegin, RangeEnd).
10517 if (RangeLen <= 1)
10518 return;
10519 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10520 return;
10521 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10522 return;
10523 Ranges.emplace_back(RangeBegin, RangeEnd);
10524 };
10525 // Find the first point where all unsafe registers are dead.
10526 // FIND: <safe instr> <-- end of first potential range
10527 // SKIP: <unsafe def>
10528 // SKIP: ... everything between ...
10529 // SKIP: <unsafe use>
10530 auto FirstPossibleEndPt = MBB.instr_rbegin();
10531 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10532 LRU.stepBackward(*FirstPossibleEndPt);
10533 // Update flags that impact how we outline across the entire block,
10534 // regardless of safety.
10535 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10536 if (AreAllUnsafeRegsDead())
10537 break;
10538 }
10539 // If we exhausted the entire block, we have no safe ranges to outline.
10540 if (FirstPossibleEndPt == MBB.instr_rend())
10541 return Ranges;
10542 // Current range.
10543 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10544 // StartPt points to the first place where all unsafe registers
10545 // are dead (if there is any such point). Begin partitioning the MBB into
10546 // ranges.
10547 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10548 LRU.stepBackward(MI);
10549 UpdateWholeMBBFlags(MI);
10550 if (!AreAllUnsafeRegsDead()) {
10551 SaveRangeIfNonEmpty();
10552 CreateNewRangeStartingAt(MI.getIterator());
10553 continue;
10554 }
10555 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10556 RangeBegin = MI.getIterator();
10557 ++RangeLen;
10558 }
10559 // Above loop misses the last (or only) range. If we are still safe, then
10560 // let's save the range.
10561 if (AreAllUnsafeRegsDead())
10562 SaveRangeIfNonEmpty();
10563 if (Ranges.empty())
10564 return Ranges;
10565 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10566 // the order.
10567 std::reverse(Ranges.begin(), Ranges.end());
10568 // If there is at least one outlinable range where LR is unavailable
10569 // somewhere, remember that.
10570 if (!LRAvailableEverywhere)
10572 return Ranges;
10573}
10574
10576AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10578 unsigned Flags) const {
10579 MachineInstr &MI = *MIT;
10580
10581 // Don't outline anything used for return address signing. The outlined
10582 // function will get signed later if needed
10583 switch (MI.getOpcode()) {
10584 case AArch64::PACM:
10585 case AArch64::PACIASP:
10586 case AArch64::PACIBSP:
10587 case AArch64::PACIASPPC:
10588 case AArch64::PACIBSPPC:
10589 case AArch64::AUTIASP:
10590 case AArch64::AUTIBSP:
10591 case AArch64::AUTIASPPCi:
10592 case AArch64::AUTIASPPCr:
10593 case AArch64::AUTIBSPPCi:
10594 case AArch64::AUTIBSPPCr:
10595 case AArch64::RETAA:
10596 case AArch64::RETAB:
10597 case AArch64::RETAASPPCi:
10598 case AArch64::RETAASPPCr:
10599 case AArch64::RETABSPPCi:
10600 case AArch64::RETABSPPCr:
10601 case AArch64::EMITBKEY:
10602 case AArch64::PAUTH_PROLOGUE:
10603 case AArch64::PAUTH_EPILOGUE:
10605 }
10606
10607 // We can only outline these if we will tail call the outlined function, or
10608 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10609 // in a tail call.
10610 //
10611 // FIXME: If the proper fixups for the offset are implemented, this should be
10612 // possible.
10613 if (MI.isCFIInstruction())
10615
10616 // Is this a terminator for a basic block?
10617 if (MI.isTerminator())
10618 // TargetInstrInfo::getOutliningType has already filtered out anything
10619 // that would break this, so we can allow it here.
10621
10622 // Make sure none of the operands are un-outlinable.
10623 for (const MachineOperand &MOP : MI.operands()) {
10624 // A check preventing CFI indices was here before, but only CFI
10625 // instructions should have those.
10626 assert(!MOP.isCFIIndex());
10627
10628 // If it uses LR or W30 explicitly, then don't touch it.
10629 if (MOP.isReg() && !MOP.isImplicit() &&
10630 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10632 }
10633
10634 // Special cases for instructions that can always be outlined, but will fail
10635 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10636 // be outlined because they don't require a *specific* value to be in LR.
10637 if (MI.getOpcode() == AArch64::ADRP)
10639
10640 // If MI is a call we might be able to outline it. We don't want to outline
10641 // any calls that rely on the position of items on the stack. When we outline
10642 // something containing a call, we have to emit a save and restore of LR in
10643 // the outlined function. Currently, this always happens by saving LR to the
10644 // stack. Thus, if we outline, say, half the parameters for a function call
10645 // plus the call, then we'll break the callee's expectations for the layout
10646 // of the stack.
10647 //
10648 // FIXME: Allow calls to functions which construct a stack frame, as long
10649 // as they don't access arguments on the stack.
10650 // FIXME: Figure out some way to analyze functions defined in other modules.
10651 // We should be able to compute the memory usage based on the IR calling
10652 // convention, even if we can't see the definition.
10653 if (MI.isCall()) {
10654 // Get the function associated with the call. Look at each operand and find
10655 // the one that represents the callee and get its name.
10656 const Function *Callee = nullptr;
10657 for (const MachineOperand &MOP : MI.operands()) {
10658 if (MOP.isGlobal()) {
10659 Callee = dyn_cast<Function>(MOP.getGlobal());
10660 break;
10661 }
10662 }
10663
10664 // Never outline calls to mcount. There isn't any rule that would require
10665 // this, but the Linux kernel's "ftrace" feature depends on it.
10666 if (Callee && Callee->getName() == "\01_mcount")
10668
10669 // If we don't know anything about the callee, assume it depends on the
10670 // stack layout of the caller. In that case, it's only legal to outline
10671 // as a tail-call. Explicitly list the call instructions we know about so we
10672 // don't get unexpected results with call pseudo-instructions.
10673 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10674 if (MI.getOpcode() == AArch64::BLR ||
10675 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10676 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10677
10678 if (!Callee)
10679 return UnknownCallOutlineType;
10680
10681 // We have a function we have information about. Check it if it's something
10682 // can safely outline.
10683 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10684
10685 // We don't know what's going on with the callee at all. Don't touch it.
10686 if (!CalleeMF)
10687 return UnknownCallOutlineType;
10688
10689 // Check if we know anything about the callee saves on the function. If we
10690 // don't, then don't touch it, since that implies that we haven't
10691 // computed anything about its stack frame yet.
10692 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10693 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10694 MFI.getNumObjects() > 0)
10695 return UnknownCallOutlineType;
10696
10697 // At this point, we can say that CalleeMF ought to not pass anything on the
10698 // stack. Therefore, we can outline it.
10700 }
10701
10702 // Don't touch the link register or W30.
10703 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10704 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10706
10707 // Don't outline BTI instructions, because that will prevent the outlining
10708 // site from being indirectly callable.
10709 if (hasBTISemantics(MI))
10711
10713}
10714
10715void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10716 for (MachineInstr &MI : MBB) {
10717 const MachineOperand *Base;
10718 TypeSize Width(0, false);
10719 int64_t Offset;
10720 bool OffsetIsScalable;
10721
10722 // Is this a load or store with an immediate offset with SP as the base?
10723 if (!MI.mayLoadOrStore() ||
10724 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10725 &RI) ||
10726 (Base->isReg() && Base->getReg() != AArch64::SP))
10727 continue;
10728
10729 // It is, so we have to fix it up.
10730 TypeSize Scale(0U, false);
10731 int64_t Dummy1, Dummy2;
10732
10733 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10734 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10735 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10736 assert(Scale != 0 && "Unexpected opcode!");
10737 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10738
10739 // We've pushed the return address to the stack, so add 16 to the offset.
10740 // This is safe, since we already checked if it would overflow when we
10741 // checked if this instruction was legal to outline.
10742 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10743 StackOffsetOperand.setImm(NewImm);
10744 }
10745}
10746
10748 const AArch64InstrInfo *TII,
10749 bool ShouldSignReturnAddr) {
10750 if (!ShouldSignReturnAddr)
10751 return;
10752
10753 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10755 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10756 TII->get(AArch64::PAUTH_EPILOGUE))
10758}
10759
10760void AArch64InstrInfo::buildOutlinedFrame(
10762 const outliner::OutlinedFunction &OF) const {
10763
10764 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10765
10766 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10767 FI->setOutliningStyle("Tail Call");
10768 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10769 // For thunk outlining, rewrite the last instruction from a call to a
10770 // tail-call.
10771 MachineInstr *Call = &*--MBB.instr_end();
10772 unsigned TailOpcode;
10773 if (Call->getOpcode() == AArch64::BL) {
10774 TailOpcode = AArch64::TCRETURNdi;
10775 } else {
10776 assert(Call->getOpcode() == AArch64::BLR ||
10777 Call->getOpcode() == AArch64::BLRNoIP);
10778 TailOpcode = AArch64::TCRETURNriALL;
10779 }
10780 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10781 .add(Call->getOperand(0))
10782 .addImm(0);
10783 MBB.insert(MBB.end(), TC);
10785
10786 FI->setOutliningStyle("Thunk");
10787 }
10788
10789 bool IsLeafFunction = true;
10790
10791 // Is there a call in the outlined range?
10792 auto IsNonTailCall = [](const MachineInstr &MI) {
10793 return MI.isCall() && !MI.isReturn();
10794 };
10795
10796 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10797 // Fix up the instructions in the range, since we're going to modify the
10798 // stack.
10799
10800 // Bugzilla ID: 46767
10801 // TODO: Check if fixing up twice is safe so we can outline these.
10802 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10803 "Can only fix up stack references once");
10804 fixupPostOutline(MBB);
10805
10806 IsLeafFunction = false;
10807
10808 // LR has to be a live in so that we can save it.
10809 if (!MBB.isLiveIn(AArch64::LR))
10810 MBB.addLiveIn(AArch64::LR);
10811
10814
10815 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10816 OF.FrameConstructionID == MachineOutlinerThunk)
10817 Et = std::prev(MBB.end());
10818
10819 // Insert a save before the outlined region
10820 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10821 .addReg(AArch64::SP, RegState::Define)
10822 .addReg(AArch64::LR)
10823 .addReg(AArch64::SP)
10824 .addImm(-16);
10825 It = MBB.insert(It, STRXpre);
10826
10827 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10828 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10829
10830 // Add a CFI saying the stack was moved 16 B down.
10831 CFIBuilder.buildDefCFAOffset(16);
10832
10833 // Add a CFI saying that the LR that we want to find is now 16 B higher
10834 // than before.
10835 CFIBuilder.buildOffset(AArch64::LR, -16);
10836 }
10837
10838 // Insert a restore before the terminator for the function.
10839 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10840 .addReg(AArch64::SP, RegState::Define)
10841 .addReg(AArch64::LR, RegState::Define)
10842 .addReg(AArch64::SP)
10843 .addImm(16);
10844 Et = MBB.insert(Et, LDRXpost);
10845 }
10846
10847 auto RASignCondition = FI->getSignReturnAddressCondition();
10848 bool ShouldSignReturnAddr = AArch64FunctionInfo::shouldSignReturnAddress(
10849 RASignCondition, !IsLeafFunction);
10850
10851 // If this is a tail call outlined function, then there's already a return.
10852 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10853 OF.FrameConstructionID == MachineOutlinerThunk) {
10854 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10855 return;
10856 }
10857
10858 // It's not a tail call, so we have to insert the return ourselves.
10859
10860 // LR has to be a live in so that we can return to it.
10861 if (!MBB.isLiveIn(AArch64::LR))
10862 MBB.addLiveIn(AArch64::LR);
10863
10864 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10865 .addReg(AArch64::LR);
10866 MBB.insert(MBB.end(), ret);
10867
10868 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10869
10870 FI->setOutliningStyle("Function");
10871
10872 // Did we have to modify the stack by saving the link register?
10873 if (OF.FrameConstructionID != MachineOutlinerDefault)
10874 return;
10875
10876 // We modified the stack.
10877 // Walk over the basic block and fix up all the stack accesses.
10878 fixupPostOutline(MBB);
10879}
10880
10881MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10884
10885 // Are we tail calling?
10886 if (C.CallConstructionID == MachineOutlinerTailCall) {
10887 // If yes, then we can just branch to the label.
10888 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10889 .addGlobalAddress(M.getNamedValue(MF.getName()))
10890 .addImm(0));
10891 return It;
10892 }
10893
10894 // Are we saving the link register?
10895 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10896 C.CallConstructionID == MachineOutlinerThunk) {
10897 // No, so just insert the call.
10898 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10899 .addGlobalAddress(M.getNamedValue(MF.getName())));
10900 return It;
10901 }
10902
10903 // We want to return the spot where we inserted the call.
10905
10906 // Instructions for saving and restoring LR around the call instruction we're
10907 // going to insert.
10908 MachineInstr *Save;
10909 MachineInstr *Restore;
10910 // Can we save to a register?
10911 if (C.CallConstructionID == MachineOutlinerRegSave) {
10912 // FIXME: This logic should be sunk into a target-specific interface so that
10913 // we don't have to recompute the register.
10914 Register Reg = findRegisterToSaveLRTo(C);
10915 assert(Reg && "No callee-saved register available?");
10916
10917 // LR has to be a live in so that we can save it.
10918 if (!MBB.isLiveIn(AArch64::LR))
10919 MBB.addLiveIn(AArch64::LR);
10920
10921 // Save and restore LR from Reg.
10922 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10923 .addReg(AArch64::XZR)
10924 .addReg(AArch64::LR)
10925 .addImm(0);
10926 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10927 .addReg(AArch64::XZR)
10928 .addReg(Reg)
10929 .addImm(0);
10930 } else {
10931 // We have the default case. Save and restore from SP.
10932 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10933 .addReg(AArch64::SP, RegState::Define)
10934 .addReg(AArch64::LR)
10935 .addReg(AArch64::SP)
10936 .addImm(-16);
10937 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10938 .addReg(AArch64::SP, RegState::Define)
10939 .addReg(AArch64::LR, RegState::Define)
10940 .addReg(AArch64::SP)
10941 .addImm(16);
10942 }
10943
10944 It = MBB.insert(It, Save);
10945 It++;
10946
10947 // Insert the call.
10948 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10949 .addGlobalAddress(M.getNamedValue(MF.getName())));
10950 CallPt = It;
10951 It++;
10952
10953 It = MBB.insert(It, Restore);
10954 return CallPt;
10955}
10956
10957bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10958 MachineFunction &MF) const {
10959 return MF.getFunction().hasMinSize();
10960}
10961
10962void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10964 DebugLoc &DL,
10965 bool AllowSideEffects) const {
10966 const MachineFunction &MF = *MBB.getParent();
10967 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10968 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10969
10970 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10971 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10972 } else if (STI.isSVEorStreamingSVEAvailable()) {
10973 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10974 .addImm(0)
10975 .addImm(0);
10976 } else if (STI.isNeonAvailable()) {
10977 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10978 .addImm(0);
10979 } else {
10980 // This is a streaming-compatible function without SVE. We don't have full
10981 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10982 // So given `movi v..` would be illegal use `fmov d..` instead.
10983 assert(STI.hasNEON() && "Expected to have NEON.");
10984 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10985 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10986 }
10987}
10988
10989std::optional<DestSourcePair>
10991
10992 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10993 // and zero immediate operands used as an alias for mov instruction.
10994 if (((MI.getOpcode() == AArch64::ORRWrs &&
10995 MI.getOperand(1).getReg() == AArch64::WZR &&
10996 MI.getOperand(3).getImm() == 0x0) ||
10997 (MI.getOpcode() == AArch64::ORRWrr &&
10998 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10999 // Check that the w->w move is not a zero-extending w->x mov.
11000 (!MI.getOperand(0).getReg().isVirtual() ||
11001 MI.getOperand(0).getSubReg() == 0) &&
11002 (!MI.getOperand(0).getReg().isPhysical() ||
11003 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
11004 /*TRI=*/nullptr) == -1))
11005 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
11006
11007 if (MI.getOpcode() == AArch64::ORRXrs &&
11008 MI.getOperand(1).getReg() == AArch64::XZR &&
11009 MI.getOperand(3).getImm() == 0x0)
11010 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
11011
11012 return std::nullopt;
11013}
11014
11015std::optional<DestSourcePair>
11017 if ((MI.getOpcode() == AArch64::ORRWrs &&
11018 MI.getOperand(1).getReg() == AArch64::WZR &&
11019 MI.getOperand(3).getImm() == 0x0) ||
11020 (MI.getOpcode() == AArch64::ORRWrr &&
11021 MI.getOperand(1).getReg() == AArch64::WZR))
11022 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
11023 return std::nullopt;
11024}
11025
11026std::optional<RegImmPair>
11027AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
11028 int Sign = 1;
11029 int64_t Offset = 0;
11030
11031 // TODO: Handle cases where Reg is a super- or sub-register of the
11032 // destination register.
11033 const MachineOperand &Op0 = MI.getOperand(0);
11034 if (!Op0.isReg() || Reg != Op0.getReg())
11035 return std::nullopt;
11036
11037 switch (MI.getOpcode()) {
11038 default:
11039 return std::nullopt;
11040 case AArch64::SUBWri:
11041 case AArch64::SUBXri:
11042 case AArch64::SUBSWri:
11043 case AArch64::SUBSXri:
11044 Sign *= -1;
11045 [[fallthrough]];
11046 case AArch64::ADDSWri:
11047 case AArch64::ADDSXri:
11048 case AArch64::ADDWri:
11049 case AArch64::ADDXri: {
11050 // TODO: Third operand can be global address (usually some string).
11051 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
11052 !MI.getOperand(2).isImm())
11053 return std::nullopt;
11054 int Shift = MI.getOperand(3).getImm();
11055 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
11056 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
11057 }
11058 }
11059 return RegImmPair{MI.getOperand(1).getReg(), Offset};
11060}
11061
11062/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
11063/// the destination register then, if possible, describe the value in terms of
11064/// the source register.
11065static std::optional<ParamLoadedValue>
11067 const TargetInstrInfo *TII,
11068 const TargetRegisterInfo *TRI) {
11069 auto DestSrc = TII->isCopyLikeInstr(MI);
11070 if (!DestSrc)
11071 return std::nullopt;
11072
11073 Register DestReg = DestSrc->Destination->getReg();
11074 Register SrcReg = DestSrc->Source->getReg();
11075
11076 if (!DestReg.isValid() || !SrcReg.isValid())
11077 return std::nullopt;
11078
11079 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
11080
11081 // If the described register is the destination, just return the source.
11082 if (DestReg == DescribedReg)
11083 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
11084
11085 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
11086 if (MI.getOpcode() == AArch64::ORRWrs &&
11087 TRI->isSuperRegister(DestReg, DescribedReg))
11088 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
11089
11090 // We may need to describe the lower part of a ORRXrs move.
11091 if (MI.getOpcode() == AArch64::ORRXrs &&
11092 TRI->isSubRegister(DestReg, DescribedReg)) {
11093 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
11094 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
11095 }
11096
11097 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
11098 "Unhandled ORR[XW]rs copy case");
11099
11100 return std::nullopt;
11101}
11102
11103bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
11104 // Functions cannot be split to different sections on AArch64 if they have
11105 // a red zone. This is because relaxing a cross-section branch may require
11106 // incrementing the stack pointer to spill a register, which would overwrite
11107 // the red zone.
11108 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
11109 return false;
11110
11112}
11113
11114bool AArch64InstrInfo::isMBBSafeToSplitToCold(
11115 const MachineBasicBlock &MBB) const {
11116 // Asm Goto blocks can contain conditional branches to goto labels, which can
11117 // get moved out of range of the branch instruction.
11118 auto isAsmGoto = [](const MachineInstr &MI) {
11119 return MI.getOpcode() == AArch64::INLINEASM_BR;
11120 };
11121 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
11122 return false;
11123
11124 // Because jump tables are label-relative instead of table-relative, they all
11125 // must be in the same section or relocation fixup handling will fail.
11126
11127 // Check if MBB is a jump table target
11128 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
11129 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
11130 return llvm::is_contained(JTE.MBBs, &MBB);
11131 };
11132 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
11133 return false;
11134
11135 // Check if MBB contains a jump table lookup
11136 for (const MachineInstr &MI : MBB) {
11137 switch (MI.getOpcode()) {
11138 case TargetOpcode::G_BRJT:
11139 case AArch64::JumpTableDest32:
11140 case AArch64::JumpTableDest16:
11141 case AArch64::JumpTableDest8:
11142 return false;
11143 default:
11144 continue;
11145 }
11146 }
11147
11148 // MBB isn't a special case, so it's safe to be split to the cold section.
11149 return true;
11150}
11151
11152std::optional<ParamLoadedValue>
11153AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
11154 Register Reg) const {
11155 const MachineFunction *MF = MI.getMF();
11156 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
11157 switch (MI.getOpcode()) {
11158 case AArch64::MOVZWi:
11159 case AArch64::MOVZXi: {
11160 // MOVZWi may be used for producing zero-extended 32-bit immediates in
11161 // 64-bit parameters, so we need to consider super-registers.
11162 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
11163 return std::nullopt;
11164
11165 if (!MI.getOperand(1).isImm())
11166 return std::nullopt;
11167 int64_t Immediate = MI.getOperand(1).getImm();
11168 int Shift = MI.getOperand(2).getImm();
11169 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
11170 nullptr);
11171 }
11172 case AArch64::ORRWrs:
11173 case AArch64::ORRXrs:
11174 return describeORRLoadedValue(MI, Reg, this, TRI);
11175 }
11176
11178}
11179
11180bool AArch64InstrInfo::isExtendLikelyToBeFolded(
11181 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
11182 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
11183 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
11184 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
11185
11186 // Anyexts are nops.
11187 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
11188 return true;
11189
11190 Register DefReg = ExtMI.getOperand(0).getReg();
11191 if (!MRI.hasOneNonDBGUse(DefReg))
11192 return false;
11193
11194 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
11195 // addressing mode.
11196 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
11197 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
11198}
11199
11200uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
11201 return get(Opc).TSFlags & AArch64::ElementSizeMask;
11202}
11203
11204bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
11205 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
11206}
11207
11208bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
11209 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
11210}
11211
11212unsigned int
11213AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
11214 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
11215}
11216
11217bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
11218 unsigned Scale) const {
11219 if (Offset && Scale)
11220 return false;
11221
11222 // Check Reg + Imm
11223 if (!Scale) {
11224 // 9-bit signed offset
11225 if (isInt<9>(Offset))
11226 return true;
11227
11228 // 12-bit unsigned offset
11229 unsigned Shift = Log2_64(NumBytes);
11230 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
11231 // Must be a multiple of NumBytes (NumBytes is a power of 2)
11232 (Offset >> Shift) << Shift == Offset)
11233 return true;
11234 return false;
11235 }
11236
11237 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
11238 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
11239}
11240
11242 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
11243 return AArch64::BLRNoIP;
11244 else
11245 return AArch64::BLR;
11246}
11247
11250 Register TargetReg, bool FrameSetup) const {
11251 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
11252
11253 MachineBasicBlock &MBB = *MBBI->getParent();
11254 MachineFunction &MF = *MBB.getParent();
11255 const AArch64InstrInfo *TII =
11256 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
11257 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
11258 DebugLoc DL = MBB.findDebugLoc(MBBI);
11259
11260 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
11261 MachineBasicBlock *LoopTestMBB =
11262 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11263 MF.insert(MBBInsertPoint, LoopTestMBB);
11264 MachineBasicBlock *LoopBodyMBB =
11265 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11266 MF.insert(MBBInsertPoint, LoopBodyMBB);
11267 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11268 MF.insert(MBBInsertPoint, ExitMBB);
11269 MachineInstr::MIFlag Flags =
11271
11272 // LoopTest:
11273 // SUB SP, SP, #ProbeSize
11274 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
11275 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
11276
11277 // CMP SP, TargetReg
11278 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
11279 AArch64::XZR)
11280 .addReg(AArch64::SP)
11281 .addReg(TargetReg)
11283 .setMIFlags(Flags);
11284
11285 // B.<Cond> LoopExit
11286 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
11288 .addMBB(ExitMBB)
11289 .setMIFlags(Flags);
11290
11291 // LDR XZR, [SP]
11292 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::LDRXui))
11293 .addDef(AArch64::XZR)
11294 .addReg(AArch64::SP)
11295 .addImm(0)
11299 Align(8)))
11300 .setMIFlags(Flags);
11301
11302 // B loop
11303 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
11304 .addMBB(LoopTestMBB)
11305 .setMIFlags(Flags);
11306
11307 // LoopExit:
11308 // MOV SP, TargetReg
11309 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
11310 .addReg(TargetReg)
11311 .addImm(0)
11313 .setMIFlags(Flags);
11314
11315 // LDR XZR, [SP]
11316 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
11317 .addReg(AArch64::XZR, RegState::Define)
11318 .addReg(AArch64::SP)
11319 .addImm(0)
11320 .setMIFlags(Flags);
11321
11322 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
11324
11325 LoopTestMBB->addSuccessor(ExitMBB);
11326 LoopTestMBB->addSuccessor(LoopBodyMBB);
11327 LoopBodyMBB->addSuccessor(LoopTestMBB);
11328 MBB.addSuccessor(LoopTestMBB);
11329
11330 // Update liveins.
11331 if (MF.getRegInfo().reservedRegsFrozen())
11332 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
11333
11334 return ExitMBB->begin();
11335}
11336
11337namespace {
11338class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
11339 MachineFunction *MF;
11340 const TargetInstrInfo *TII;
11341 const TargetRegisterInfo *TRI;
11343
11344 /// The block of the loop
11345 MachineBasicBlock *LoopBB;
11346 /// The conditional branch of the loop
11347 MachineInstr *CondBranch;
11348 /// The compare instruction for loop control
11349 MachineInstr *Comp;
11350 /// The number of the operand of the loop counter value in Comp
11351 unsigned CompCounterOprNum;
11352 /// The instruction that updates the loop counter value
11353 MachineInstr *Update;
11354 /// The number of the operand of the loop counter value in Update
11355 unsigned UpdateCounterOprNum;
11356 /// The initial value of the loop counter
11357 Register Init;
11358 /// True iff Update is a predecessor of Comp
11359 bool IsUpdatePriorComp;
11360
11361 /// The normalized condition used by createTripCountGreaterCondition()
11363
11364public:
11365 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
11366 MachineInstr *Comp, unsigned CompCounterOprNum,
11367 MachineInstr *Update, unsigned UpdateCounterOprNum,
11368 Register Init, bool IsUpdatePriorComp,
11370 : MF(Comp->getParent()->getParent()),
11371 TII(MF->getSubtarget().getInstrInfo()),
11372 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
11373 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
11374 CompCounterOprNum(CompCounterOprNum), Update(Update),
11375 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
11376 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
11377
11378 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
11379 // Make the instructions for loop control be placed in stage 0.
11380 // The predecessors of Comp are considered by the caller.
11381 return MI == Comp;
11382 }
11383
11384 std::optional<bool> createTripCountGreaterCondition(
11385 int TC, MachineBasicBlock &MBB,
11386 SmallVectorImpl<MachineOperand> &CondParam) override {
11387 // A branch instruction will be inserted as "if (Cond) goto epilogue".
11388 // Cond is normalized for such use.
11389 // The predecessors of the branch are assumed to have already been inserted.
11390 CondParam = Cond;
11391 return {};
11392 }
11393
11394 void createRemainingIterationsGreaterCondition(
11395 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
11396 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
11397
11398 void setPreheader(MachineBasicBlock *NewPreheader) override {}
11399
11400 void adjustTripCount(int TripCountAdjust) override {}
11401
11402 bool isMVEExpanderSupported() override { return true; }
11403};
11404} // namespace
11405
11406/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
11407/// is replaced by ReplaceReg. The output register is newly created.
11408/// The other operands are unchanged from MI.
11409static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
11410 Register ReplaceReg, MachineBasicBlock &MBB,
11411 MachineBasicBlock::iterator InsertTo) {
11412 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
11413 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
11414 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
11415 Register Result = 0;
11416 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
11417 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
11418 Result = MRI.createVirtualRegister(
11419 MRI.getRegClass(NewMI->getOperand(0).getReg()));
11420 NewMI->getOperand(I).setReg(Result);
11421 } else if (I == ReplaceOprNum) {
11422 MRI.constrainRegClass(ReplaceReg, TII->getRegClass(NewMI->getDesc(), I));
11423 NewMI->getOperand(I).setReg(ReplaceReg);
11424 }
11425 }
11426 MBB.insert(InsertTo, NewMI);
11427 return Result;
11428}
11429
11430void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
11433 // Create and accumulate conditions for next TC iterations.
11434 // Example:
11435 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
11436 // # iteration of the kernel
11437 //
11438 // # insert the following instructions
11439 // cond = CSINCXr 0, 0, C, implicit $nzcv
11440 // counter = ADDXri counter, 1 # clone from this->Update
11441 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
11442 // cond = CSINCXr cond, cond, C, implicit $nzcv
11443 // ... (repeat TC times)
11444 // SUBSXri cond, 0, implicit-def $nzcv
11445
11446 assert(CondBranch->getOpcode() == AArch64::Bcc);
11447 // CondCode to exit the loop
11449 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
11450 if (CondBranch->getOperand(1).getMBB() == LoopBB)
11452
11453 // Accumulate conditions to exit the loop
11454 Register AccCond = AArch64::XZR;
11455
11456 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
11457 auto AccumulateCond = [&](Register CurCond,
11459 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11460 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11461 .addReg(NewCond, RegState::Define)
11462 .addReg(CurCond)
11463 .addReg(CurCond)
11465 return NewCond;
11466 };
11467
11468 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11469 // Update and Comp for I==0 are already exists in MBB
11470 // (MBB is an unrolled kernel)
11471 Register Counter;
11472 for (int I = 0; I <= TC; ++I) {
11473 Register NextCounter;
11474 if (I != 0)
11475 NextCounter =
11476 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11477
11478 AccCond = AccumulateCond(AccCond, CC);
11479
11480 if (I != TC) {
11481 if (I == 0) {
11482 if (Update != Comp && IsUpdatePriorComp) {
11483 Counter =
11484 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11485 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11486 MBB.end());
11487 } else {
11488 // can use already calculated value
11489 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11490 }
11491 } else if (Update != Comp) {
11492 NextCounter =
11493 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11494 }
11495 }
11496 Counter = NextCounter;
11497 }
11498 } else {
11499 Register Counter;
11500 if (LastStage0Insts.empty()) {
11501 // use initial counter value (testing if the trip count is sufficient to
11502 // be executed by pipelined code)
11503 Counter = Init;
11504 if (IsUpdatePriorComp)
11505 Counter =
11506 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11507 } else {
11508 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11509 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11510 }
11511
11512 for (int I = 0; I <= TC; ++I) {
11513 Register NextCounter;
11514 NextCounter =
11515 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11516 AccCond = AccumulateCond(AccCond, CC);
11517 if (I != TC && Update != Comp)
11518 NextCounter =
11519 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11520 Counter = NextCounter;
11521 }
11522 }
11523
11524 // If AccCond == 0, the remainder is greater than TC.
11525 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11526 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11527 .addReg(AccCond)
11528 .addImm(0)
11529 .addImm(0);
11530 Cond.clear();
11532}
11533
11534static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11535 Register &RegMBB, Register &RegOther) {
11536 assert(Phi.getNumOperands() == 5);
11537 if (Phi.getOperand(2).getMBB() == MBB) {
11538 RegMBB = Phi.getOperand(1).getReg();
11539 RegOther = Phi.getOperand(3).getReg();
11540 } else {
11541 assert(Phi.getOperand(4).getMBB() == MBB);
11542 RegMBB = Phi.getOperand(3).getReg();
11543 RegOther = Phi.getOperand(1).getReg();
11544 }
11545}
11546
11548 if (!Reg.isVirtual())
11549 return false;
11550 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11551 return MRI.getVRegDef(Reg)->getParent() != BB;
11552}
11553
11554/// If Reg is an induction variable, return true and set some parameters
11555static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11556 MachineInstr *&UpdateInst,
11557 unsigned &UpdateCounterOprNum, Register &InitReg,
11558 bool &IsUpdatePriorComp) {
11559 // Example:
11560 //
11561 // Preheader:
11562 // InitReg = ...
11563 // LoopBB:
11564 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11565 // Reg = COPY Reg0 ; COPY is ignored.
11566 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11567 // ; Reg is the value calculated in the previous
11568 // ; iteration, so IsUpdatePriorComp == false.
11569
11570 if (LoopBB->pred_size() != 2)
11571 return false;
11572 if (!Reg.isVirtual())
11573 return false;
11574 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11575 UpdateInst = nullptr;
11576 UpdateCounterOprNum = 0;
11577 InitReg = 0;
11578 IsUpdatePriorComp = true;
11579 Register CurReg = Reg;
11580 while (true) {
11581 MachineInstr *Def = MRI.getVRegDef(CurReg);
11582 if (Def->getParent() != LoopBB)
11583 return false;
11584 if (Def->isCopy()) {
11585 // Ignore copy instructions unless they contain subregisters
11586 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11587 return false;
11588 CurReg = Def->getOperand(1).getReg();
11589 } else if (Def->isPHI()) {
11590 if (InitReg != 0)
11591 return false;
11592 if (!UpdateInst)
11593 IsUpdatePriorComp = false;
11594 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11595 } else {
11596 if (UpdateInst)
11597 return false;
11598 switch (Def->getOpcode()) {
11599 case AArch64::ADDSXri:
11600 case AArch64::ADDSWri:
11601 case AArch64::SUBSXri:
11602 case AArch64::SUBSWri:
11603 case AArch64::ADDXri:
11604 case AArch64::ADDWri:
11605 case AArch64::SUBXri:
11606 case AArch64::SUBWri:
11607 UpdateInst = Def;
11608 UpdateCounterOprNum = 1;
11609 break;
11610 case AArch64::ADDSXrr:
11611 case AArch64::ADDSWrr:
11612 case AArch64::SUBSXrr:
11613 case AArch64::SUBSWrr:
11614 case AArch64::ADDXrr:
11615 case AArch64::ADDWrr:
11616 case AArch64::SUBXrr:
11617 case AArch64::SUBWrr:
11618 UpdateInst = Def;
11619 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11620 UpdateCounterOprNum = 1;
11621 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11622 UpdateCounterOprNum = 2;
11623 else
11624 return false;
11625 break;
11626 default:
11627 return false;
11628 }
11629 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11630 }
11631
11632 if (!CurReg.isVirtual())
11633 return false;
11634 if (Reg == CurReg)
11635 break;
11636 }
11637
11638 if (!UpdateInst)
11639 return false;
11640
11641 return true;
11642}
11643
11644std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11646 // Accept loops that meet the following conditions
11647 // * The conditional branch is BCC
11648 // * The compare instruction is ADDS/SUBS/WHILEXX
11649 // * One operand of the compare is an induction variable and the other is a
11650 // loop invariant value
11651 // * The induction variable is incremented/decremented by a single instruction
11652 // * Does not contain CALL or instructions which have unmodeled side effects
11653
11654 for (MachineInstr &MI : *LoopBB)
11655 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11656 // This instruction may use NZCV, which interferes with the instruction to
11657 // be inserted for loop control.
11658 return nullptr;
11659
11660 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11662 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11663 return nullptr;
11664
11665 // Infinite loops are not supported
11666 if (TBB == LoopBB && FBB == LoopBB)
11667 return nullptr;
11668
11669 // Must be conditional branch
11670 if (TBB != LoopBB && FBB == nullptr)
11671 return nullptr;
11672
11673 assert((TBB == LoopBB || FBB == LoopBB) &&
11674 "The Loop must be a single-basic-block loop");
11675
11676 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11678
11679 if (CondBranch->getOpcode() != AArch64::Bcc)
11680 return nullptr;
11681
11682 // Normalization for createTripCountGreaterCondition()
11683 if (TBB == LoopBB)
11685
11686 MachineInstr *Comp = nullptr;
11687 unsigned CompCounterOprNum = 0;
11688 for (MachineInstr &MI : reverse(*LoopBB)) {
11689 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11690 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11691 // operands is a loop invariant value
11692
11693 switch (MI.getOpcode()) {
11694 case AArch64::SUBSXri:
11695 case AArch64::SUBSWri:
11696 case AArch64::ADDSXri:
11697 case AArch64::ADDSWri:
11698 Comp = &MI;
11699 CompCounterOprNum = 1;
11700 break;
11701 case AArch64::ADDSWrr:
11702 case AArch64::ADDSXrr:
11703 case AArch64::SUBSWrr:
11704 case AArch64::SUBSXrr:
11705 Comp = &MI;
11706 break;
11707 default:
11708 if (isWhileOpcode(MI.getOpcode())) {
11709 Comp = &MI;
11710 break;
11711 }
11712 return nullptr;
11713 }
11714
11715 if (CompCounterOprNum == 0) {
11716 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11717 CompCounterOprNum = 2;
11718 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11719 CompCounterOprNum = 1;
11720 else
11721 return nullptr;
11722 }
11723 break;
11724 }
11725 }
11726 if (!Comp)
11727 return nullptr;
11728
11729 MachineInstr *Update = nullptr;
11730 Register Init;
11731 bool IsUpdatePriorComp;
11732 unsigned UpdateCounterOprNum;
11733 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11734 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11735 return nullptr;
11736
11737 return std::make_unique<AArch64PipelinerLoopInfo>(
11738 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11739 Init, IsUpdatePriorComp, Cond);
11740}
11741
11742/// verifyInstruction - Perform target specific instruction verification.
11743bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11744 StringRef &ErrInfo) const {
11745 // Verify that immediate offsets on load/store instructions are within range.
11746 // Stack objects with an FI operand are excluded as they can be fixed up
11747 // during PEI.
11748 TypeSize Scale(0U, false), Width(0U, false);
11749 int64_t MinOffset, MaxOffset;
11750 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11751 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11752 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11753 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11754 if (Imm < MinOffset || Imm > MaxOffset) {
11755 ErrInfo = "Unexpected immediate on load/store instruction";
11756 return false;
11757 }
11758 }
11759 }
11760
11761 const MCInstrDesc &MCID = MI.getDesc();
11762 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11763 const MachineOperand &MO = MI.getOperand(Op);
11764 switch (MCID.operands()[Op].OperandType) {
11766 if (!MO.isImm() || MO.getImm() != 0) {
11767 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11768 return false;
11769 }
11770 break;
11772 if (!MO.isImm() ||
11774 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11775 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11776 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11777 return false;
11778 }
11779 break;
11780 default:
11781 break;
11782 }
11783 }
11784 return true;
11785}
11786
11787#define GET_INSTRINFO_HELPERS
11788#define GET_INSTRMAP_INFO
11789#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isFrameStoreOpcode(int Opcode)
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static bool isFrameLoadOpcode(int Opcode)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI)
static bool isANDOpcode(MachineInstr &MI)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static bool mustAvoidNeonAtMBBI(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
Returns true if in a streaming call site region without SME-FA64.
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool isInStreamingCallSiteRegion(MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
Returns true if the instruction at I is in a streaming call site region, within a single basic block.
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, RegState State, const TargetRegisterInfo *TRI)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
SignReturnAddress getSignReturnAddressCondition() const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool shouldSignReturnAddress(SignReturnAddress Condition, bool IsLRSpilled)
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:123
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:712
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:709
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:656
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:576
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:618
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:591
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:688
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
static constexpr unsigned NoRegister
Definition MCRegister.h:60
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents a location in source code.
Definition SMLoc.h:22
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:49
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:40
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:207
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
constexpr double e
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
@ Renamable
Register that may be renamed.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr RegState getDefRegState(bool B)
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
static MCRegister getXRegFromWReg(MCRegister Reg)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:236
bool optimizeTerminators(MachineBasicBlock *MBB, const TargetInstrInfo &TII)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
constexpr RegState getUndefRegState(bool B)
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.