LLVM 18.0.0git
AArch64ExpandPseudoInsts.cpp
Go to the documentation of this file.
1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations. This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
19#include "AArch64Subtarget.h"
31#include "llvm/IR/DebugLoc.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/Pass.h"
38#include <cassert>
39#include <cstdint>
40#include <iterator>
41#include <limits>
42#include <utility>
43
44using namespace llvm;
45
46#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
47
48namespace {
49
50class AArch64ExpandPseudo : public MachineFunctionPass {
51public:
52 const AArch64InstrInfo *TII;
53
54 static char ID;
55
56 AArch64ExpandPseudo() : MachineFunctionPass(ID) {
58 }
59
60 bool runOnMachineFunction(MachineFunction &Fn) override;
61
62 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
63
64private:
65 bool expandMBB(MachineBasicBlock &MBB);
68 bool expandMultiVecPseudo(MachineBasicBlock &MBB,
70 TargetRegisterClass ContiguousClass,
71 TargetRegisterClass StridedClass,
72 unsigned ContiguousOpc, unsigned StridedOpc);
74 unsigned BitSize);
75
76 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
79 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
80 unsigned ExtendImm, unsigned ZeroReg,
82 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
85 bool expandSetTagLoop(MachineBasicBlock &MBB,
88 bool expandSVESpillFill(MachineBasicBlock &MBB,
90 unsigned N);
91 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
94 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
96 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
98 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
100};
101
102} // end anonymous namespace
103
104char AArch64ExpandPseudo::ID = 0;
105
106INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
107 AARCH64_EXPAND_PSEUDO_NAME, false, false)
108
109/// Transfer implicit operands on the pseudo instruction to the
110/// instructions created from the expansion.
111static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
113 const MCInstrDesc &Desc = OldMI.getDesc();
114 for (const MachineOperand &MO :
115 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
116 assert(MO.isReg() && MO.getReg());
117 if (MO.isUse())
118 UseMI.add(MO);
119 else
120 DefMI.add(MO);
121 }
122}
123
124/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
125/// real move-immediate instructions to synthesize the immediate.
126bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
128 unsigned BitSize) {
129 MachineInstr &MI = *MBBI;
130 Register DstReg = MI.getOperand(0).getReg();
131 uint64_t RenamableState =
132 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
133 uint64_t Imm = MI.getOperand(1).getImm();
134
135 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
136 // Useless def, and we don't want to risk creating an invalid ORR (which
137 // would really write to sp).
138 MI.eraseFromParent();
139 return true;
140 }
141
143 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
144 assert(Insn.size() != 0);
145
147 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
148 bool LastItem = std::next(I) == E;
149 switch (I->Opcode)
150 {
151 default: llvm_unreachable("unhandled!"); break;
152
153 case AArch64::ORRWri:
154 case AArch64::ORRXri:
155 if (I->Op1 == 0) {
156 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
157 .add(MI.getOperand(0))
158 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
159 .addImm(I->Op2));
160 } else {
161 Register DstReg = MI.getOperand(0).getReg();
162 bool DstIsDead = MI.getOperand(0).isDead();
163 MIBS.push_back(
164 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
165 .addReg(DstReg, RegState::Define |
166 getDeadRegState(DstIsDead && LastItem) |
167 RenamableState)
168 .addReg(DstReg)
169 .addImm(I->Op2));
170 }
171 break;
172 case AArch64::ANDXri:
173 case AArch64::EORXri:
174 if (I->Op1 == 0) {
175 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
176 .add(MI.getOperand(0))
177 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
178 .addImm(I->Op2));
179 } else {
180 Register DstReg = MI.getOperand(0).getReg();
181 bool DstIsDead = MI.getOperand(0).isDead();
182 MIBS.push_back(
183 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
184 .addReg(DstReg, RegState::Define |
185 getDeadRegState(DstIsDead && LastItem) |
186 RenamableState)
187 .addReg(DstReg)
188 .addImm(I->Op2));
189 }
190 break;
191 case AArch64::MOVNWi:
192 case AArch64::MOVNXi:
193 case AArch64::MOVZWi:
194 case AArch64::MOVZXi: {
195 bool DstIsDead = MI.getOperand(0).isDead();
196 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
197 .addReg(DstReg, RegState::Define |
198 getDeadRegState(DstIsDead && LastItem) |
199 RenamableState)
200 .addImm(I->Op1)
201 .addImm(I->Op2));
202 } break;
203 case AArch64::MOVKWi:
204 case AArch64::MOVKXi: {
205 Register DstReg = MI.getOperand(0).getReg();
206 bool DstIsDead = MI.getOperand(0).isDead();
207 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
208 .addReg(DstReg,
210 getDeadRegState(DstIsDead && LastItem) |
211 RenamableState)
212 .addReg(DstReg)
213 .addImm(I->Op1)
214 .addImm(I->Op2));
215 } break;
216 }
217 }
218 transferImpOps(MI, MIBS.front(), MIBS.back());
219 MI.eraseFromParent();
220 return true;
221}
222
223bool AArch64ExpandPseudo::expandCMP_SWAP(
225 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
226 MachineBasicBlock::iterator &NextMBBI) {
227 MachineInstr &MI = *MBBI;
228 MIMetadata MIMD(MI);
229 const MachineOperand &Dest = MI.getOperand(0);
230 Register StatusReg = MI.getOperand(1).getReg();
231 bool StatusDead = MI.getOperand(1).isDead();
232 // Duplicating undef operands into 2 instructions does not guarantee the same
233 // value on both; However undef should be replaced by xzr anyway.
234 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
235 Register AddrReg = MI.getOperand(2).getReg();
236 Register DesiredReg = MI.getOperand(3).getReg();
237 Register NewReg = MI.getOperand(4).getReg();
238
240 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
241 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
242 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
243
244 MF->insert(++MBB.getIterator(), LoadCmpBB);
245 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
246 MF->insert(++StoreBB->getIterator(), DoneBB);
247
248 // .Lloadcmp:
249 // mov wStatus, 0
250 // ldaxr xDest, [xAddr]
251 // cmp xDest, xDesired
252 // b.ne .Ldone
253 if (!StatusDead)
254 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
255 .addImm(0).addImm(0);
256 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
257 .addReg(AddrReg);
258 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
259 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
260 .addReg(DesiredReg)
261 .addImm(ExtendImm);
262 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
264 .addMBB(DoneBB)
265 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
266 LoadCmpBB->addSuccessor(DoneBB);
267 LoadCmpBB->addSuccessor(StoreBB);
268
269 // .Lstore:
270 // stlxr wStatus, xNew, [xAddr]
271 // cbnz wStatus, .Lloadcmp
272 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
273 .addReg(NewReg)
274 .addReg(AddrReg);
275 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
276 .addReg(StatusReg, getKillRegState(StatusDead))
277 .addMBB(LoadCmpBB);
278 StoreBB->addSuccessor(LoadCmpBB);
279 StoreBB->addSuccessor(DoneBB);
280
281 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
282 DoneBB->transferSuccessors(&MBB);
283
284 MBB.addSuccessor(LoadCmpBB);
285
286 NextMBBI = MBB.end();
287 MI.eraseFromParent();
288
289 // Recompute livein lists.
290 LivePhysRegs LiveRegs;
291 computeAndAddLiveIns(LiveRegs, *DoneBB);
292 computeAndAddLiveIns(LiveRegs, *StoreBB);
293 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
294 // Do an extra pass around the loop to get loop carried registers right.
295 StoreBB->clearLiveIns();
296 computeAndAddLiveIns(LiveRegs, *StoreBB);
297 LoadCmpBB->clearLiveIns();
298 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
299
300 return true;
301}
302
303bool AArch64ExpandPseudo::expandCMP_SWAP_128(
305 MachineBasicBlock::iterator &NextMBBI) {
306 MachineInstr &MI = *MBBI;
307 MIMetadata MIMD(MI);
308 MachineOperand &DestLo = MI.getOperand(0);
309 MachineOperand &DestHi = MI.getOperand(1);
310 Register StatusReg = MI.getOperand(2).getReg();
311 bool StatusDead = MI.getOperand(2).isDead();
312 // Duplicating undef operands into 2 instructions does not guarantee the same
313 // value on both; However undef should be replaced by xzr anyway.
314 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
315 Register AddrReg = MI.getOperand(3).getReg();
316 Register DesiredLoReg = MI.getOperand(4).getReg();
317 Register DesiredHiReg = MI.getOperand(5).getReg();
318 Register NewLoReg = MI.getOperand(6).getReg();
319 Register NewHiReg = MI.getOperand(7).getReg();
320
321 unsigned LdxpOp, StxpOp;
322
323 switch (MI.getOpcode()) {
324 case AArch64::CMP_SWAP_128_MONOTONIC:
325 LdxpOp = AArch64::LDXPX;
326 StxpOp = AArch64::STXPX;
327 break;
328 case AArch64::CMP_SWAP_128_RELEASE:
329 LdxpOp = AArch64::LDXPX;
330 StxpOp = AArch64::STLXPX;
331 break;
332 case AArch64::CMP_SWAP_128_ACQUIRE:
333 LdxpOp = AArch64::LDAXPX;
334 StxpOp = AArch64::STXPX;
335 break;
336 case AArch64::CMP_SWAP_128:
337 LdxpOp = AArch64::LDAXPX;
338 StxpOp = AArch64::STLXPX;
339 break;
340 default:
341 llvm_unreachable("Unexpected opcode");
342 }
343
345 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
346 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
347 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
348 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
349
350 MF->insert(++MBB.getIterator(), LoadCmpBB);
351 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
352 MF->insert(++StoreBB->getIterator(), FailBB);
353 MF->insert(++FailBB->getIterator(), DoneBB);
354
355 // .Lloadcmp:
356 // ldaxp xDestLo, xDestHi, [xAddr]
357 // cmp xDestLo, xDesiredLo
358 // sbcs xDestHi, xDesiredHi
359 // b.ne .Ldone
360 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
361 .addReg(DestLo.getReg(), RegState::Define)
362 .addReg(DestHi.getReg(), RegState::Define)
363 .addReg(AddrReg);
364 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
365 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
366 .addReg(DesiredLoReg)
367 .addImm(0);
368 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
369 .addUse(AArch64::WZR)
370 .addUse(AArch64::WZR)
372 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
373 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
374 .addReg(DesiredHiReg)
375 .addImm(0);
376 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
377 .addUse(StatusReg, RegState::Kill)
378 .addUse(StatusReg, RegState::Kill)
380 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
381 .addUse(StatusReg, getKillRegState(StatusDead))
382 .addMBB(FailBB);
383 LoadCmpBB->addSuccessor(FailBB);
384 LoadCmpBB->addSuccessor(StoreBB);
385
386 // .Lstore:
387 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
388 // cbnz wStatus, .Lloadcmp
389 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
390 .addReg(NewLoReg)
391 .addReg(NewHiReg)
392 .addReg(AddrReg);
393 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
394 .addReg(StatusReg, getKillRegState(StatusDead))
395 .addMBB(LoadCmpBB);
396 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
397 StoreBB->addSuccessor(LoadCmpBB);
398 StoreBB->addSuccessor(DoneBB);
399
400 // .Lfail:
401 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
402 // cbnz wStatus, .Lloadcmp
403 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
404 .addReg(DestLo.getReg())
405 .addReg(DestHi.getReg())
406 .addReg(AddrReg);
407 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
408 .addReg(StatusReg, getKillRegState(StatusDead))
409 .addMBB(LoadCmpBB);
410 FailBB->addSuccessor(LoadCmpBB);
411 FailBB->addSuccessor(DoneBB);
412
413 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
414 DoneBB->transferSuccessors(&MBB);
415
416 MBB.addSuccessor(LoadCmpBB);
417
418 NextMBBI = MBB.end();
419 MI.eraseFromParent();
420
421 // Recompute liveness bottom up.
422 LivePhysRegs LiveRegs;
423 computeAndAddLiveIns(LiveRegs, *DoneBB);
424 computeAndAddLiveIns(LiveRegs, *FailBB);
425 computeAndAddLiveIns(LiveRegs, *StoreBB);
426 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
427
428 // Do an extra pass in the loop to get the loop carried dependencies right.
429 FailBB->clearLiveIns();
430 computeAndAddLiveIns(LiveRegs, *FailBB);
431 StoreBB->clearLiveIns();
432 computeAndAddLiveIns(LiveRegs, *StoreBB);
433 LoadCmpBB->clearLiveIns();
434 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
435
436 return true;
437}
438
439/// \brief Expand Pseudos to Instructions with destructive operands.
440///
441/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
442/// or for fixing relaxed register allocation conditions to comply with
443/// the instructions register constraints. The latter case may be cheaper
444/// than setting the register constraints in the register allocator,
445/// since that will insert regular MOV instructions rather than MOVPRFX.
446///
447/// Example (after register allocation):
448///
449/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
450///
451/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
452/// * We cannot map directly to FSUB_ZPmZ_B because the register
453/// constraints of the instruction are not met.
454/// * Also the _ZERO specifies the false lanes need to be zeroed.
455///
456/// We first try to see if the destructive operand == result operand,
457/// if not, we try to swap the operands, e.g.
458///
459/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
460///
461/// But because FSUB_ZPmZ is not commutative, this is semantically
462/// different, so we need a reverse instruction:
463///
464/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
465///
466/// Then we implement the zeroing of the false lanes of Z0 by adding
467/// a zeroing MOVPRFX instruction:
468///
469/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
470/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
471///
472/// Note that this can only be done for _ZERO or _UNDEF variants where
473/// we can guarantee the false lanes to be zeroed (by implementing this)
474/// or that they are undef (don't care / not used), otherwise the
475/// swapping of operands is illegal because the operation is not
476/// (or cannot be emulated to be) fully commutative.
477bool AArch64ExpandPseudo::expand_DestructiveOp(
481 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
482 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
483 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
484 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
485 Register DstReg = MI.getOperand(0).getReg();
486 bool DstIsDead = MI.getOperand(0).isDead();
487 bool UseRev = false;
488 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
489
490 switch (DType) {
493 if (DstReg == MI.getOperand(3).getReg()) {
494 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
495 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
496 UseRev = true;
497 break;
498 }
499 [[fallthrough]];
502 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
503 break;
505 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
506 break;
508 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
509 if (DstReg == MI.getOperand(3).getReg()) {
510 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
511 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
512 UseRev = true;
513 } else if (DstReg == MI.getOperand(4).getReg()) {
514 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
515 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
516 UseRev = true;
517 }
518 break;
519 default:
520 llvm_unreachable("Unsupported Destructive Operand type");
521 }
522
523 // MOVPRFX can only be used if the destination operand
524 // is the destructive operand, not as any other operand,
525 // so the Destructive Operand must be unique.
526 bool DOPRegIsUnique = false;
527 switch (DType) {
529 DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
530 break;
533 DOPRegIsUnique =
534 DstReg != MI.getOperand(DOPIdx).getReg() ||
535 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
536 break;
539 DOPRegIsUnique = true;
540 break;
542 DOPRegIsUnique =
543 DstReg != MI.getOperand(DOPIdx).getReg() ||
544 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
545 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
546 break;
547 }
548
549 // Resolve the reverse opcode
550 if (UseRev) {
551 int NewOpcode;
552 // e.g. DIV -> DIVR
553 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
554 Opcode = NewOpcode;
555 // e.g. DIVR -> DIV
556 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
557 Opcode = NewOpcode;
558 }
559
560 // Get the right MOVPRFX
561 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
562 unsigned MovPrfx, LSLZero, MovPrfxZero;
563 switch (ElementSize) {
566 MovPrfx = AArch64::MOVPRFX_ZZ;
567 LSLZero = AArch64::LSL_ZPmI_B;
568 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
569 break;
571 MovPrfx = AArch64::MOVPRFX_ZZ;
572 LSLZero = AArch64::LSL_ZPmI_H;
573 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
574 break;
576 MovPrfx = AArch64::MOVPRFX_ZZ;
577 LSLZero = AArch64::LSL_ZPmI_S;
578 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
579 break;
581 MovPrfx = AArch64::MOVPRFX_ZZ;
582 LSLZero = AArch64::LSL_ZPmI_D;
583 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
584 break;
585 default:
586 llvm_unreachable("Unsupported ElementSize");
587 }
588
589 //
590 // Create the destructive operation (if required)
591 //
592 MachineInstrBuilder PRFX, DOP;
593 if (FalseZero) {
594 // If we cannot prefix the requested instruction we'll instead emit a
595 // prefixed_zeroing_mov for DestructiveBinary.
596 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
599 "The destructive operand should be unique");
600 assert(ElementSize != AArch64::ElementSizeNone &&
601 "This instruction is unpredicated");
602
603 // Merge source operand into destination register
604 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
605 .addReg(DstReg, RegState::Define)
606 .addReg(MI.getOperand(PredIdx).getReg())
607 .addReg(MI.getOperand(DOPIdx).getReg());
608
609 // After the movprfx, the destructive operand is same as Dst
610 DOPIdx = 0;
611
612 // Create the additional LSL to zero the lanes when the DstReg is not
613 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
614 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
615 if ((DType == AArch64::DestructiveBinary ||
618 !DOPRegIsUnique) {
619 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
620 .addReg(DstReg, RegState::Define)
621 .add(MI.getOperand(PredIdx))
622 .addReg(DstReg)
623 .addImm(0);
624 }
625 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
626 assert(DOPRegIsUnique && "The destructive operand should be unique");
627 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
628 .addReg(DstReg, RegState::Define)
629 .addReg(MI.getOperand(DOPIdx).getReg());
630 DOPIdx = 0;
631 }
632
633 //
634 // Create the destructive operation
635 //
636 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
637 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
638
639 switch (DType) {
641 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
642 .add(MI.getOperand(PredIdx))
643 .add(MI.getOperand(SrcIdx));
644 break;
649 DOP.add(MI.getOperand(PredIdx))
650 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
651 .add(MI.getOperand(SrcIdx));
652 break;
654 DOP.add(MI.getOperand(PredIdx))
655 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
656 .add(MI.getOperand(SrcIdx))
657 .add(MI.getOperand(Src2Idx));
658 break;
659 }
660
661 if (PRFX) {
663 transferImpOps(MI, PRFX, DOP);
664 } else
665 transferImpOps(MI, DOP, DOP);
666
667 MI.eraseFromParent();
668 return true;
669}
670
671bool AArch64ExpandPseudo::expandSetTagLoop(
673 MachineBasicBlock::iterator &NextMBBI) {
674 MachineInstr &MI = *MBBI;
675 DebugLoc DL = MI.getDebugLoc();
676 Register SizeReg = MI.getOperand(0).getReg();
677 Register AddressReg = MI.getOperand(1).getReg();
678
680
681 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
682 const unsigned OpCode1 =
683 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
684 const unsigned OpCode2 =
685 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
686
687 unsigned Size = MI.getOperand(2).getImm();
688 assert(Size > 0 && Size % 16 == 0);
689 if (Size % (16 * 2) != 0) {
690 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
691 .addReg(AddressReg)
692 .addReg(AddressReg)
693 .addImm(1);
694 Size -= 16;
695 }
697 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
698 .addImm(Size);
699 expandMOVImm(MBB, I, 64);
700
701 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
702 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
703
704 MF->insert(++MBB.getIterator(), LoopBB);
705 MF->insert(++LoopBB->getIterator(), DoneBB);
706
707 BuildMI(LoopBB, DL, TII->get(OpCode2))
708 .addDef(AddressReg)
709 .addReg(AddressReg)
710 .addReg(AddressReg)
711 .addImm(2)
713 .setMIFlags(MI.getFlags());
714 BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
715 .addDef(SizeReg)
716 .addReg(SizeReg)
717 .addImm(16 * 2)
718 .addImm(0);
719 BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
721 .addMBB(LoopBB)
722 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
723
724 LoopBB->addSuccessor(LoopBB);
725 LoopBB->addSuccessor(DoneBB);
726
727 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
728 DoneBB->transferSuccessors(&MBB);
729
730 MBB.addSuccessor(LoopBB);
731
732 NextMBBI = MBB.end();
733 MI.eraseFromParent();
734 // Recompute liveness bottom up.
735 LivePhysRegs LiveRegs;
736 computeAndAddLiveIns(LiveRegs, *DoneBB);
737 computeAndAddLiveIns(LiveRegs, *LoopBB);
738 // Do an extra pass in the loop to get the loop carried dependencies right.
739 // FIXME: is this necessary?
740 LoopBB->clearLiveIns();
741 computeAndAddLiveIns(LiveRegs, *LoopBB);
742 DoneBB->clearLiveIns();
743 computeAndAddLiveIns(LiveRegs, *DoneBB);
744
745 return true;
746}
747
748bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
750 unsigned Opc, unsigned N) {
751 const TargetRegisterInfo *TRI =
753 MachineInstr &MI = *MBBI;
754 for (unsigned Offset = 0; Offset < N; ++Offset) {
755 int ImmOffset = MI.getOperand(2).getImm() + Offset;
756 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
757 assert(ImmOffset >= -256 && ImmOffset < 256 &&
758 "Immediate spill offset out of range");
759 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
760 .addReg(
761 TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
762 Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
763 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
764 .addImm(ImmOffset);
765 }
766 MI.eraseFromParent();
767 return true;
768}
769
770bool AArch64ExpandPseudo::expandCALL_RVMARKER(
772 // Expand CALL_RVMARKER pseudo to:
773 // - a branch to the call target, followed by
774 // - the special `mov x29, x29` marker, and
775 // - another branch, to the runtime function
776 // Mark the sequence as bundle, to avoid passes moving other code in between.
777 MachineInstr &MI = *MBBI;
778
779 MachineInstr *OriginalCall;
780 MachineOperand &RVTarget = MI.getOperand(0);
781 MachineOperand &CallTarget = MI.getOperand(1);
782 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
783 "invalid operand for regular call");
784 assert(RVTarget.isGlobal() && "invalid operand for attached call");
785 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
786 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
787 OriginalCall->addOperand(CallTarget);
788
789 unsigned RegMaskStartIdx = 2;
790 // Skip register arguments. Those are added during ISel, but are not
791 // needed for the concrete branch.
792 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
793 auto MOP = MI.getOperand(RegMaskStartIdx);
794 assert(MOP.isReg() && "can only add register operands");
796 MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
797 /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
798 RegMaskStartIdx++;
799 }
800 for (const MachineOperand &MO :
801 llvm::drop_begin(MI.operands(), RegMaskStartIdx))
802 OriginalCall->addOperand(MO);
803
804 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
805 .addReg(AArch64::FP, RegState::Define)
806 .addReg(AArch64::XZR)
807 .addReg(AArch64::FP)
808 .addImm(0);
809
810 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
811 .add(RVTarget)
812 .getInstr();
813
814 if (MI.shouldUpdateCallSiteInfo())
815 MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
816
817 MI.eraseFromParent();
818 finalizeBundle(MBB, OriginalCall->getIterator(),
819 std::next(RVCall->getIterator()));
820 return true;
821}
822
823bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
825 // Expand CALL_BTI pseudo to:
826 // - a branch to the call target
827 // - a BTI instruction
828 // Mark the sequence as a bundle, to avoid passes moving other code in
829 // between.
830
831 MachineInstr &MI = *MBBI;
832 MachineOperand &CallTarget = MI.getOperand(0);
833 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
834 "invalid operand for regular call");
835 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
837 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
838 Call->addOperand(CallTarget);
839 Call->setCFIType(*MBB.getParent(), MI.getCFIType());
840 Call->copyImplicitOps(*MBB.getParent(), MI);
841
842 MachineInstr *BTI =
843 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
844 // BTI J so that setjmp can to BR to this.
845 .addImm(36)
846 .getInstr();
847
848 if (MI.shouldUpdateCallSiteInfo())
849 MBB.getParent()->moveCallSiteInfo(&MI, Call);
850
851 MI.eraseFromParent();
852 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
853 return true;
854}
855
856bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
858 Register CtxReg = MBBI->getOperand(0).getReg();
859 Register BaseReg = MBBI->getOperand(1).getReg();
860 int Offset = MBBI->getOperand(2).getImm();
861 DebugLoc DL(MBBI->getDebugLoc());
862 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
863
864 if (STI.getTargetTriple().getArchName() != "arm64e") {
865 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
866 .addUse(CtxReg)
867 .addUse(BaseReg)
868 .addImm(Offset / 8)
871 return true;
872 }
873
874 // We need to sign the context in an address-discriminated way. 0xc31a is a
875 // fixed random value, chosen as part of the ABI.
876 // add x16, xBase, #Offset
877 // movk x16, #0xc31a, lsl #48
878 // mov x17, x22/xzr
879 // pacdb x17, x16
880 // str x17, [xBase, #Offset]
881 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
882 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
883 .addUse(BaseReg)
884 .addImm(abs(Offset))
885 .addImm(0)
887 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
888 .addUse(AArch64::X16)
889 .addImm(0xc31a)
890 .addImm(48)
892 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
893 // move it somewhere before signing.
894 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
895 .addUse(AArch64::XZR)
896 .addUse(CtxReg)
897 .addImm(0)
899 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
900 .addUse(AArch64::X17)
901 .addUse(AArch64::X16)
903 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
904 .addUse(AArch64::X17)
905 .addUse(BaseReg)
906 .addImm(Offset / 8)
908
910 return true;
911}
912
914AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
916 MachineInstr &MI = *MBBI;
917 assert((std::next(MBBI) != MBB.end() ||
918 MI.getParent()->successors().begin() !=
919 MI.getParent()->successors().end()) &&
920 "Unexpected unreachable in block that restores ZA");
921
922 // Compare TPIDR2_EL0 value against 0.
923 DebugLoc DL = MI.getDebugLoc();
924 MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
925 .add(MI.getOperand(0));
926
927 // Split MBB and create two new blocks:
928 // - MBB now contains all instructions before RestoreZAPseudo.
929 // - SMBB contains the RestoreZAPseudo instruction only.
930 // - EndBB contains all instructions after RestoreZAPseudo.
931 MachineInstr &PrevMI = *std::prev(MBBI);
932 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
933 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
934 ? *SMBB->successors().begin()
935 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
936
937 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
938 Cbz.addMBB(SMBB);
939 BuildMI(&MBB, DL, TII->get(AArch64::B))
940 .addMBB(EndBB);
941 MBB.addSuccessor(EndBB);
942
943 // Replace the pseudo with a call (BL).
945 BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
946 MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
947 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
948 MIB.add(MI.getOperand(I));
949 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
950
951 MI.eraseFromParent();
952 return EndBB;
953}
954
956AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
958 MachineInstr &MI = *MBBI;
959 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
960 // Exception handling code generated by Clang may introduce unreachables and it
961 // seems unnecessary to restore pstate.sm when that happens. Note that it is
962 // not just an optimisation, the code below expects a successor instruction/block
963 // in order to split the block at MBBI.
964 if (std::next(MBBI) == MBB.end() &&
965 MI.getParent()->successors().begin() ==
966 MI.getParent()->successors().end()) {
967 MI.eraseFromParent();
968 return &MBB;
969 }
970
971 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
972 // following operands:
973 //
974 // MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask>
975 //
976 // The pseudo is expanded into a conditional smstart/smstop, with a
977 // check if pstate.sm (register) equals the expected value, and if not,
978 // invokes the smstart/smstop.
979 //
980 // As an example, the following block contains a normal call from a
981 // streaming-compatible function:
982 //
983 // OrigBB:
984 // MSRpstatePseudo 3, 0, %0, 0, <regmask> <- Conditional SMSTOP
985 // bl @normal_callee
986 // MSRpstatePseudo 3, 1, %0, 0, <regmask> <- Conditional SMSTART
987 //
988 // ...which will be transformed into:
989 //
990 // OrigBB:
991 // TBNZx %0:gpr64, 0, SMBB
992 // b EndBB
993 //
994 // SMBB:
995 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
996 //
997 // EndBB:
998 // bl @normal_callee
999 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1000 //
1001 DebugLoc DL = MI.getDebugLoc();
1002
1003 // Create the conditional branch based on the third operand of the
1004 // instruction, which tells us if we are wrapping a normal or streaming
1005 // function.
1006 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1007 // expected value for the callee (0 for a normal callee and 1 for a streaming
1008 // callee).
1009 auto PStateSM = MI.getOperand(2).getReg();
1011 unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
1012 bool IsStreamingCallee = MI.getOperand(3).getImm();
1013 unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW;
1015 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
1016
1017 // Split MBB and create two new blocks:
1018 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1019 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1020 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1021 MachineInstr &PrevMI = *std::prev(MBBI);
1022 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
1023 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
1024 ? *SMBB->successors().begin()
1025 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
1026
1027 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1028 Tbx.addMBB(SMBB);
1029 BuildMI(&MBB, DL, TII->get(AArch64::B))
1030 .addMBB(EndBB);
1031 MBB.addSuccessor(EndBB);
1032
1033 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1034 MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
1035 TII->get(AArch64::MSRpstatesvcrImm1));
1036 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1037 // these contain the CopyFromReg for the first argument and the flag to
1038 // indicate whether the callee is streaming or normal).
1039 MIB.add(MI.getOperand(0));
1040 MIB.add(MI.getOperand(1));
1041 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1042 MIB.add(MI.getOperand(i));
1043
1044 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1045
1046 MI.eraseFromParent();
1047 return EndBB;
1048}
1049
1050bool AArch64ExpandPseudo::expandMultiVecPseudo(
1052 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1053 unsigned ContiguousOp, unsigned StridedOpc) {
1054 MachineInstr &MI = *MBBI;
1055 Register Tuple = MI.getOperand(0).getReg();
1056
1057 auto ContiguousRange = ContiguousClass.getRegisters();
1058 auto StridedRange = StridedClass.getRegisters();
1059 unsigned Opc;
1060 if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {
1061 Opc = ContiguousOp;
1062 } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {
1063 Opc = StridedOpc;
1064 } else
1065 llvm_unreachable("Cannot expand Multi-Vector pseudo");
1066
1067 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1068 .add(MI.getOperand(0))
1069 .add(MI.getOperand(1))
1070 .add(MI.getOperand(2))
1071 .add(MI.getOperand(3));
1072 transferImpOps(MI, MIB, MIB);
1073 MI.eraseFromParent();
1074 return true;
1075}
1076
1077/// If MBBI references a pseudo instruction that should be expanded here,
1078/// do the expansion and return true. Otherwise return false.
1079bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1081 MachineBasicBlock::iterator &NextMBBI) {
1082 MachineInstr &MI = *MBBI;
1083 unsigned Opcode = MI.getOpcode();
1084
1085 // Check if we can expand the destructive op
1086 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1087 if (OrigInstr != -1) {
1088 auto &Orig = TII->get(OrigInstr);
1089 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1091 return expand_DestructiveOp(MI, MBB, MBBI);
1092 }
1093 }
1094
1095 switch (Opcode) {
1096 default:
1097 break;
1098
1099 case AArch64::BSPv8i8:
1100 case AArch64::BSPv16i8: {
1101 Register DstReg = MI.getOperand(0).getReg();
1102 if (DstReg == MI.getOperand(3).getReg()) {
1103 // Expand to BIT
1104 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1105 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1106 : AArch64::BITv16i8))
1107 .add(MI.getOperand(0))
1108 .add(MI.getOperand(3))
1109 .add(MI.getOperand(2))
1110 .add(MI.getOperand(1));
1111 } else if (DstReg == MI.getOperand(2).getReg()) {
1112 // Expand to BIF
1113 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1114 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1115 : AArch64::BIFv16i8))
1116 .add(MI.getOperand(0))
1117 .add(MI.getOperand(2))
1118 .add(MI.getOperand(3))
1119 .add(MI.getOperand(1));
1120 } else {
1121 // Expand to BSL, use additional move if required
1122 if (DstReg == MI.getOperand(1).getReg()) {
1123 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1124 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1125 : AArch64::BSLv16i8))
1126 .add(MI.getOperand(0))
1127 .add(MI.getOperand(1))
1128 .add(MI.getOperand(2))
1129 .add(MI.getOperand(3));
1130 } else {
1131 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1132 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1133 : AArch64::ORRv16i8))
1134 .addReg(DstReg,
1136 getRenamableRegState(MI.getOperand(0).isRenamable()))
1137 .add(MI.getOperand(1))
1138 .add(MI.getOperand(1));
1139 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1140 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1141 : AArch64::BSLv16i8))
1142 .add(MI.getOperand(0))
1143 .addReg(DstReg,
1145 getRenamableRegState(MI.getOperand(0).isRenamable()))
1146 .add(MI.getOperand(2))
1147 .add(MI.getOperand(3));
1148 }
1149 }
1150 MI.eraseFromParent();
1151 return true;
1152 }
1153
1154 case AArch64::ADDWrr:
1155 case AArch64::SUBWrr:
1156 case AArch64::ADDXrr:
1157 case AArch64::SUBXrr:
1158 case AArch64::ADDSWrr:
1159 case AArch64::SUBSWrr:
1160 case AArch64::ADDSXrr:
1161 case AArch64::SUBSXrr:
1162 case AArch64::ANDWrr:
1163 case AArch64::ANDXrr:
1164 case AArch64::BICWrr:
1165 case AArch64::BICXrr:
1166 case AArch64::ANDSWrr:
1167 case AArch64::ANDSXrr:
1168 case AArch64::BICSWrr:
1169 case AArch64::BICSXrr:
1170 case AArch64::EONWrr:
1171 case AArch64::EONXrr:
1172 case AArch64::EORWrr:
1173 case AArch64::EORXrr:
1174 case AArch64::ORNWrr:
1175 case AArch64::ORNXrr:
1176 case AArch64::ORRWrr:
1177 case AArch64::ORRXrr: {
1178 unsigned Opcode;
1179 switch (MI.getOpcode()) {
1180 default:
1181 return false;
1182 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1183 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1184 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1185 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1186 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1187 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1188 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1189 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1190 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1191 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1192 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1193 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1194 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1195 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1196 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1197 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1198 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1199 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1200 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1201 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1202 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1203 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1204 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1205 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1206 }
1207 MachineFunction &MF = *MBB.getParent();
1208 // Try to create new inst without implicit operands added.
1209 MachineInstr *NewMI = MF.CreateMachineInstr(
1210 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1211 MBB.insert(MBBI, NewMI);
1212 MachineInstrBuilder MIB1(MF, NewMI);
1213 MIB1->setPCSections(MF, MI.getPCSections());
1214 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1215 .add(MI.getOperand(1))
1216 .add(MI.getOperand(2))
1218 transferImpOps(MI, MIB1, MIB1);
1219 if (auto DebugNumber = MI.peekDebugInstrNum())
1220 NewMI->setDebugInstrNum(DebugNumber);
1221 MI.eraseFromParent();
1222 return true;
1223 }
1224
1225 case AArch64::LOADgot: {
1227 Register DstReg = MI.getOperand(0).getReg();
1228 const MachineOperand &MO1 = MI.getOperand(1);
1229 unsigned Flags = MO1.getTargetFlags();
1230
1231 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1232 // Tiny codemodel expand to LDR
1233 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1234 TII->get(AArch64::LDRXl), DstReg);
1235
1236 if (MO1.isGlobal()) {
1237 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1238 } else if (MO1.isSymbol()) {
1239 MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1240 } else {
1241 assert(MO1.isCPI() &&
1242 "Only expect globals, externalsymbols, or constant pools");
1243 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1244 }
1245 } else {
1246 // Small codemodel expand into ADRP + LDR.
1247 MachineFunction &MF = *MI.getParent()->getParent();
1248 DebugLoc DL = MI.getDebugLoc();
1249 MachineInstrBuilder MIB1 =
1250 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1251
1255 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1256 unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1257 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1258 .addDef(Reg32)
1259 .addReg(DstReg, RegState::Kill)
1260 .addReg(DstReg, DstFlags | RegState::Implicit);
1261 } else {
1262 Register DstReg = MI.getOperand(0).getReg();
1263 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1264 .add(MI.getOperand(0))
1265 .addUse(DstReg, RegState::Kill);
1266 }
1267
1268 if (MO1.isGlobal()) {
1269 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1270 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1272 } else if (MO1.isSymbol()) {
1274 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1277 } else {
1278 assert(MO1.isCPI() &&
1279 "Only expect globals, externalsymbols, or constant pools");
1280 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1281 Flags | AArch64II::MO_PAGE);
1282 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1283 Flags | AArch64II::MO_PAGEOFF |
1285 }
1286
1287 transferImpOps(MI, MIB1, MIB2);
1288 }
1289 MI.eraseFromParent();
1290 return true;
1291 }
1292 case AArch64::MOVaddrBA: {
1293 MachineFunction &MF = *MI.getParent()->getParent();
1295 // blockaddress expressions have to come from a constant pool because the
1296 // largest addend (and hence offset within a function) allowed for ADRP is
1297 // only 8MB.
1298 const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1299 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1300
1302 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1303
1304 Register DstReg = MI.getOperand(0).getReg();
1305 auto MIB1 =
1306 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1308 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1309 TII->get(AArch64::LDRXui), DstReg)
1310 .addUse(DstReg)
1313 transferImpOps(MI, MIB1, MIB2);
1314 MI.eraseFromParent();
1315 return true;
1316 }
1317 }
1318 [[fallthrough]];
1319 case AArch64::MOVaddr:
1320 case AArch64::MOVaddrJT:
1321 case AArch64::MOVaddrCP:
1322 case AArch64::MOVaddrTLS:
1323 case AArch64::MOVaddrEXT: {
1324 // Expand into ADRP + ADD.
1325 Register DstReg = MI.getOperand(0).getReg();
1326 assert(DstReg != AArch64::XZR);
1327 MachineInstrBuilder MIB1 =
1328 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1329 .add(MI.getOperand(1));
1330
1331 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1332 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1333 // We do so by creating a MOVK that sets bits 48-63 of the register to
1334 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1335 // the small code model so we can assume a binary size of <= 4GB, which
1336 // makes the untagged PC relative offset positive. The binary must also be
1337 // loaded into address range [0, 2^48). Both of these properties need to
1338 // be ensured at runtime when using tagged addresses.
1339 auto Tag = MI.getOperand(1);
1340 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1341 Tag.setOffset(0x100000000);
1342 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1343 .addReg(DstReg)
1344 .add(Tag)
1345 .addImm(48);
1346 }
1347
1348 MachineInstrBuilder MIB2 =
1349 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1350 .add(MI.getOperand(0))
1351 .addReg(DstReg)
1352 .add(MI.getOperand(2))
1353 .addImm(0);
1354
1355 transferImpOps(MI, MIB1, MIB2);
1356 MI.eraseFromParent();
1357 return true;
1358 }
1359 case AArch64::ADDlowTLS:
1360 // Produce a plain ADD
1361 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1362 .add(MI.getOperand(0))
1363 .add(MI.getOperand(1))
1364 .add(MI.getOperand(2))
1365 .addImm(0);
1366 MI.eraseFromParent();
1367 return true;
1368
1369 case AArch64::MOVbaseTLS: {
1370 Register DstReg = MI.getOperand(0).getReg();
1371 auto SysReg = AArch64SysReg::TPIDR_EL0;
1373 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1374 SysReg = AArch64SysReg::TPIDR_EL3;
1375 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1376 SysReg = AArch64SysReg::TPIDR_EL2;
1377 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1378 SysReg = AArch64SysReg::TPIDR_EL1;
1379 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1380 SysReg = AArch64SysReg::TPIDRRO_EL0;
1381 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1382 .addImm(SysReg);
1383 MI.eraseFromParent();
1384 return true;
1385 }
1386
1387 case AArch64::MOVi32imm:
1388 return expandMOVImm(MBB, MBBI, 32);
1389 case AArch64::MOVi64imm:
1390 return expandMOVImm(MBB, MBBI, 64);
1391 case AArch64::RET_ReallyLR: {
1392 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1393 // function and missing live-ins. We are fine in practice because callee
1394 // saved register handling ensures the register value is restored before
1395 // RET, but we need the undef flag here to appease the MachineVerifier
1396 // liveness checks.
1398 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1399 .addReg(AArch64::LR, RegState::Undef);
1400 transferImpOps(MI, MIB, MIB);
1401 MI.eraseFromParent();
1402 return true;
1403 }
1404 case AArch64::CMP_SWAP_8:
1405 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1406 AArch64::SUBSWrx,
1408 AArch64::WZR, NextMBBI);
1409 case AArch64::CMP_SWAP_16:
1410 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1411 AArch64::SUBSWrx,
1413 AArch64::WZR, NextMBBI);
1414 case AArch64::CMP_SWAP_32:
1415 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1416 AArch64::SUBSWrs,
1418 AArch64::WZR, NextMBBI);
1419 case AArch64::CMP_SWAP_64:
1420 return expandCMP_SWAP(MBB, MBBI,
1421 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1423 AArch64::XZR, NextMBBI);
1424 case AArch64::CMP_SWAP_128:
1425 case AArch64::CMP_SWAP_128_RELEASE:
1426 case AArch64::CMP_SWAP_128_ACQUIRE:
1427 case AArch64::CMP_SWAP_128_MONOTONIC:
1428 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1429
1430 case AArch64::AESMCrrTied:
1431 case AArch64::AESIMCrrTied: {
1433 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1434 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1435 AArch64::AESIMCrr))
1436 .add(MI.getOperand(0))
1437 .add(MI.getOperand(1));
1438 transferImpOps(MI, MIB, MIB);
1439 MI.eraseFromParent();
1440 return true;
1441 }
1442 case AArch64::IRGstack: {
1443 MachineFunction &MF = *MBB.getParent();
1445 const AArch64FrameLowering *TFI =
1446 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1447
1448 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1449 // almost always point to SP-after-prologue; if not, emit a longer
1450 // instruction sequence.
1451 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1452 Register FrameReg;
1453 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1454 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1455 /*PreferFP=*/false,
1456 /*ForSimm=*/true);
1457 Register SrcReg = FrameReg;
1458 if (FrameRegOffset) {
1459 // Use output register as temporary.
1460 SrcReg = MI.getOperand(0).getReg();
1461 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1462 FrameRegOffset, TII);
1463 }
1464 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1465 .add(MI.getOperand(0))
1466 .addUse(SrcReg)
1467 .add(MI.getOperand(2));
1468 MI.eraseFromParent();
1469 return true;
1470 }
1471 case AArch64::TAGPstack: {
1472 int64_t Offset = MI.getOperand(2).getImm();
1473 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1474 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1475 .add(MI.getOperand(0))
1476 .add(MI.getOperand(1))
1477 .addImm(std::abs(Offset))
1478 .add(MI.getOperand(4));
1479 MI.eraseFromParent();
1480 return true;
1481 }
1482 case AArch64::STGloop_wback:
1483 case AArch64::STZGloop_wback:
1484 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1485 case AArch64::STGloop:
1486 case AArch64::STZGloop:
1488 "Non-writeback variants of STGloop / STZGloop should not "
1489 "survive past PrologEpilogInserter.");
1490 case AArch64::STR_ZZZZXI:
1491 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1492 case AArch64::STR_ZZZXI:
1493 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1494 case AArch64::STR_ZZXI:
1495 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1496 case AArch64::LDR_ZZZZXI:
1497 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1498 case AArch64::LDR_ZZZXI:
1499 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1500 case AArch64::LDR_ZZXI:
1501 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1502 case AArch64::BLR_RVMARKER:
1503 return expandCALL_RVMARKER(MBB, MBBI);
1504 case AArch64::BLR_BTI:
1505 return expandCALL_BTI(MBB, MBBI);
1506 case AArch64::StoreSwiftAsyncContext:
1507 return expandStoreSwiftAsyncContext(MBB, MBBI);
1508 case AArch64::RestoreZAPseudo: {
1509 auto *NewMBB = expandRestoreZA(MBB, MBBI);
1510 if (NewMBB != &MBB)
1511 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1512 return true;
1513 }
1514 case AArch64::MSRpstatePseudo: {
1515 auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1516 if (NewMBB != &MBB)
1517 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1518 return true;
1519 }
1520 case AArch64::LD1B_2Z_IMM_PSEUDO:
1521 return expandMultiVecPseudo(
1522 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1523 AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1524 case AArch64::LD1H_2Z_IMM_PSEUDO:
1525 return expandMultiVecPseudo(
1526 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1527 AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1528 case AArch64::LD1W_2Z_IMM_PSEUDO:
1529 return expandMultiVecPseudo(
1530 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1531 AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1532 case AArch64::LD1D_2Z_IMM_PSEUDO:
1533 return expandMultiVecPseudo(
1534 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1535 AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1536 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1537 return expandMultiVecPseudo(
1538 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1539 AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1540 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1541 return expandMultiVecPseudo(
1542 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1543 AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1544 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1545 return expandMultiVecPseudo(
1546 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1547 AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1548 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1549 return expandMultiVecPseudo(
1550 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1551 AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1552 case AArch64::LD1B_2Z_PSEUDO:
1553 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1554 AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1555 AArch64::LD1B_2Z_STRIDED);
1556 case AArch64::LD1H_2Z_PSEUDO:
1557 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1558 AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1559 AArch64::LD1H_2Z_STRIDED);
1560 case AArch64::LD1W_2Z_PSEUDO:
1561 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1562 AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1563 AArch64::LD1W_2Z_STRIDED);
1564 case AArch64::LD1D_2Z_PSEUDO:
1565 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1566 AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1567 AArch64::LD1D_2Z_STRIDED);
1568 case AArch64::LDNT1B_2Z_PSEUDO:
1569 return expandMultiVecPseudo(
1570 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1571 AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1572 case AArch64::LDNT1H_2Z_PSEUDO:
1573 return expandMultiVecPseudo(
1574 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1575 AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1576 case AArch64::LDNT1W_2Z_PSEUDO:
1577 return expandMultiVecPseudo(
1578 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1579 AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1580 case AArch64::LDNT1D_2Z_PSEUDO:
1581 return expandMultiVecPseudo(
1582 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1583 AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1584 case AArch64::LD1B_4Z_IMM_PSEUDO:
1585 return expandMultiVecPseudo(
1586 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1587 AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1588 case AArch64::LD1H_4Z_IMM_PSEUDO:
1589 return expandMultiVecPseudo(
1590 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1591 AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1592 case AArch64::LD1W_4Z_IMM_PSEUDO:
1593 return expandMultiVecPseudo(
1594 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1595 AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1596 case AArch64::LD1D_4Z_IMM_PSEUDO:
1597 return expandMultiVecPseudo(
1598 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1599 AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1600 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1601 return expandMultiVecPseudo(
1602 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1603 AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1604 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1605 return expandMultiVecPseudo(
1606 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1607 AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1608 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1609 return expandMultiVecPseudo(
1610 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1611 AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1612 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1613 return expandMultiVecPseudo(
1614 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1615 AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1616 case AArch64::LD1B_4Z_PSEUDO:
1617 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1618 AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1619 AArch64::LD1B_4Z_STRIDED);
1620 case AArch64::LD1H_4Z_PSEUDO:
1621 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1622 AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1623 AArch64::LD1H_4Z_STRIDED);
1624 case AArch64::LD1W_4Z_PSEUDO:
1625 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1626 AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1627 AArch64::LD1W_4Z_STRIDED);
1628 case AArch64::LD1D_4Z_PSEUDO:
1629 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1630 AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1631 AArch64::LD1D_4Z_STRIDED);
1632 case AArch64::LDNT1B_4Z_PSEUDO:
1633 return expandMultiVecPseudo(
1634 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1635 AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1636 case AArch64::LDNT1H_4Z_PSEUDO:
1637 return expandMultiVecPseudo(
1638 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1639 AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1640 case AArch64::LDNT1W_4Z_PSEUDO:
1641 return expandMultiVecPseudo(
1642 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1643 AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1644 case AArch64::LDNT1D_4Z_PSEUDO:
1645 return expandMultiVecPseudo(
1646 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1647 AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1648 }
1649 return false;
1650}
1651
1652/// Iterate over the instructions in basic block MBB and expand any
1653/// pseudo instructions. Return true if anything was modified.
1654bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1655 bool Modified = false;
1656
1658 while (MBBI != E) {
1659 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1660 Modified |= expandMI(MBB, MBBI, NMBBI);
1661 MBBI = NMBBI;
1662 }
1663
1664 return Modified;
1665}
1666
1667bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1668 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1669
1670 bool Modified = false;
1671 for (auto &MBB : MF)
1672 Modified |= expandMBB(MBB);
1673 return Modified;
1674}
1675
1676/// Returns an instance of the pseudo instruction expansion pass.
1678 return new AArch64ExpandPseudo();
1679}
#define AARCH64_EXPAND_PSEUDO_NAME
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr uint32_t Opcode
Definition: aarch32.h:200
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
The address of a basic block.
Definition: Constants.h:874
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:50
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Set of metadata that should be preserved when using BuildMI().
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
void moveCallSiteInfo(const MachineInstr *Old, const MachineInstr *New)
Move the call site info from Old to \New call site info.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
void setDebugInstrNum(unsigned Num)
Set instruction number of this MachineInstr.
Definition: MachineInstr.h:523
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:35
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeModel::Model getCodeModel() const
Returns the code model.
iterator_range< SmallVectorImpl< MCPhysReg >::const_iterator > getRegisters() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
int getSVERevInstr(uint16_t Opcode)
int getSVEPseudoMap(uint16_t Opcode)
int getSVENonRevInstr(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:440
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1381
unsigned getDeadRegState(bool B)
void initializeAArch64ExpandPseudoPass(PassRegistry &)
FunctionPass * createAArch64ExpandPseudoPass()
Returns an instance of the pseudo instruction expansion pass.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
unsigned getKillRegState(bool B)
unsigned getRenamableRegState(bool B)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1883
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.