LLVM 22.0.0git
MachineSMEABIPass.cpp
Go to the documentation of this file.
1//===- MachineSMEABIPass.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements the SME ABI requirements for ZA state. This includes
10// implementing the lazy (and agnostic) ZA state save schemes around calls.
11//
12//===----------------------------------------------------------------------===//
13//
14// This pass works by collecting instructions that require ZA to be in a
15// specific state (e.g., "ACTIVE" or "SAVED") and inserting the necessary state
16// transitions to ensure ZA is in the required state before instructions. State
17// transitions represent actions such as setting up or restoring a lazy save.
18// Certain points within a function may also have predefined states independent
19// of any instructions, for example, a "shared_za" function is always entered
20// and exited in the "ACTIVE" state.
21//
22// To handle ZA state across control flow, we make use of edge bundling. This
23// assigns each block an "incoming" and "outgoing" edge bundle (representing
24// incoming and outgoing edges). Initially, these are unique to each block;
25// then, in the process of forming bundles, the outgoing bundle of a block is
26// joined with the incoming bundle of all successors. The result is that each
27// bundle can be assigned a single ZA state, which ensures the state required by
28// all a blocks' successors is the same, and that each basic block will always
29// be entered with the same ZA state. This eliminates the need for splitting
30// edges to insert state transitions or "phi" nodes for ZA states.
31//
32// See below for a simple example of edge bundling.
33//
34// The following shows a conditionally executed basic block (BB1):
35//
36// if (cond)
37// BB1
38// BB2
39//
40// Initial Bundles Joined Bundles
41//
42// ┌──0──┐ ┌──0──┐
43// │ BB0 │ │ BB0 │
44// └──1──┘ └──1──┘
45// ├───────┐ ├───────┐
46// ▼ │ ▼ │
47// ┌──2──┐ │ ─────► ┌──1──┐ │
48// │ BB1 │ ▼ │ BB1 │ ▼
49// └──3──┘ ┌──4──┐ └──1──┘ ┌──1──┐
50// └───►4 BB2 │ └───►1 BB2 │
51// └──5──┘ └──2──┘
52//
53// On the left are the initial per-block bundles, and on the right are the
54// joined bundles (which are the result of the EdgeBundles analysis).
55
56#include "AArch64InstrInfo.h"
58#include "AArch64Subtarget.h"
68
69using namespace llvm;
70
71#define DEBUG_TYPE "aarch64-machine-sme-abi"
72
73namespace {
74
75enum ZAState {
76 // Any/unknown state (not valid)
77 ANY = 0,
78
79 // ZA is in use and active (i.e. within the accumulator)
80 ACTIVE,
81
82 // A ZA save has been set up or committed (i.e. ZA is dormant or off)
83 LOCAL_SAVED,
84
85 // ZA is off or a lazy save has been set up by the caller
86 CALLER_DORMANT,
87
88 // ZA is off
89 OFF,
90
91 // The number of ZA states (not a valid state)
92 NUM_ZA_STATE
93};
94
95/// A bitmask enum to record live physical registers that the "emit*" routines
96/// may need to preserve. Note: This only tracks registers we may clobber.
97enum LiveRegs : uint8_t {
98 None = 0,
99 NZCV = 1 << 0,
100 W0 = 1 << 1,
101 W0_HI = 1 << 2,
102 X0 = W0 | W0_HI,
103 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ W0_HI)
104};
105
106/// Holds the virtual registers live physical registers have been saved to.
107struct PhysRegSave {
108 LiveRegs PhysLiveRegs;
109 Register StatusFlags = AArch64::NoRegister;
110 Register X0Save = AArch64::NoRegister;
111};
112
113/// Contains the needed ZA state (and live registers) at an instruction. That is
114/// the state ZA must be in _before_ "InsertPt".
115struct InstInfo {
116 ZAState NeededState{ZAState::ANY};
118 LiveRegs PhysLiveRegs = LiveRegs::None;
119};
120
121/// Contains the needed ZA state for each instruction in a block. Instructions
122/// that do not require a ZA state are not recorded.
123struct BlockInfo {
125 ZAState FixedEntryState{ZAState::ANY};
126 ZAState DesiredIncomingState{ZAState::ANY};
127 ZAState DesiredOutgoingState{ZAState::ANY};
128 LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
129 LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
130};
131
132/// Contains the needed ZA state information for all blocks within a function.
133struct FunctionInfo {
135 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
136 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
137};
138
139/// State/helpers that is only needed when emitting code to handle
140/// saving/restoring ZA.
141class EmitContext {
142public:
143 EmitContext() = default;
144
145 /// Get or create a TPIDR2 block in \p MF.
146 int getTPIDR2Block(MachineFunction &MF) {
147 if (TPIDR2BlockFI)
148 return *TPIDR2BlockFI;
149 MachineFrameInfo &MFI = MF.getFrameInfo();
150 TPIDR2BlockFI = MFI.CreateStackObject(16, Align(16), false);
151 return *TPIDR2BlockFI;
152 }
153
154 /// Get or create agnostic ZA buffer pointer in \p MF.
155 Register getAgnosticZABufferPtr(MachineFunction &MF) {
156 if (AgnosticZABufferPtr != AArch64::NoRegister)
157 return AgnosticZABufferPtr;
158 Register BufferPtr =
159 MF.getInfo<AArch64FunctionInfo>()->getEarlyAllocSMESaveBuffer();
160 AgnosticZABufferPtr =
161 BufferPtr != AArch64::NoRegister
162 ? BufferPtr
163 : MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
164 return AgnosticZABufferPtr;
165 }
166
167 /// Returns true if the function must allocate a ZA save buffer on entry. This
168 /// will be the case if, at any point in the function, a ZA save was emitted.
169 bool needsSaveBuffer() const {
170 assert(!(TPIDR2BlockFI && AgnosticZABufferPtr) &&
171 "Cannot have both a TPIDR2 block and agnostic ZA buffer");
172 return TPIDR2BlockFI || AgnosticZABufferPtr != AArch64::NoRegister;
173 }
174
175private:
176 std::optional<int> TPIDR2BlockFI;
177 Register AgnosticZABufferPtr = AArch64::NoRegister;
178};
179
180/// Checks if \p State is a legal edge bundle state. For a state to be a legal
181/// bundle state, it must be possible to transition from it to any other bundle
182/// state without losing any ZA state. This is the case for ACTIVE/LOCAL_SAVED,
183/// as you can transition between those states by saving/restoring ZA. The OFF
184/// state would not be legal, as transitioning to it drops the content of ZA.
185static bool isLegalEdgeBundleZAState(ZAState State) {
186 switch (State) {
187 case ZAState::ACTIVE: // ZA state within the accumulator/ZT0.
188 case ZAState::LOCAL_SAVED: // ZA state is saved on the stack.
189 return true;
190 default:
191 return false;
192 }
193}
194
195StringRef getZAStateString(ZAState State) {
196#define MAKE_CASE(V) \
197 case V: \
198 return #V;
199 switch (State) {
200 MAKE_CASE(ZAState::ANY)
201 MAKE_CASE(ZAState::ACTIVE)
202 MAKE_CASE(ZAState::LOCAL_SAVED)
203 MAKE_CASE(ZAState::CALLER_DORMANT)
204 MAKE_CASE(ZAState::OFF)
205 default:
206 llvm_unreachable("Unexpected ZAState");
207 }
208#undef MAKE_CASE
209}
210
211static bool isZAorZTRegOp(const TargetRegisterInfo &TRI,
212 const MachineOperand &MO) {
213 if (!MO.isReg() || !MO.getReg().isPhysical())
214 return false;
215 return any_of(TRI.subregs_inclusive(MO.getReg()), [](const MCPhysReg &SR) {
216 return AArch64::MPR128RegClass.contains(SR) ||
217 AArch64::ZTRRegClass.contains(SR);
218 });
219}
220
221/// Returns the required ZA state needed before \p MI and an iterator pointing
222/// to where any code required to change the ZA state should be inserted.
223static std::pair<ZAState, MachineBasicBlock::iterator>
224getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI,
225 bool ZAOffAtReturn) {
227
228 if (MI.getOpcode() == AArch64::InOutZAUsePseudo)
229 return {ZAState::ACTIVE, std::prev(InsertPt)};
230
231 if (MI.getOpcode() == AArch64::RequiresZASavePseudo)
232 return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
233
234 if (MI.isReturn())
235 return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
236
237 for (auto &MO : MI.operands()) {
238 if (isZAorZTRegOp(TRI, MO))
239 return {ZAState::ACTIVE, InsertPt};
240 }
241
242 return {ZAState::ANY, InsertPt};
243}
244
245struct MachineSMEABI : public MachineFunctionPass {
246 inline static char ID = 0;
247
248 MachineSMEABI(CodeGenOptLevel OptLevel = CodeGenOptLevel::Default)
249 : MachineFunctionPass(ID), OptLevel(OptLevel) {}
250
251 bool runOnMachineFunction(MachineFunction &MF) override;
252
253 StringRef getPassName() const override { return "Machine SME ABI pass"; }
254
255 void getAnalysisUsage(AnalysisUsage &AU) const override {
256 AU.setPreservesCFG();
261 }
262
263 /// Collects the needed ZA state (and live registers) before each instruction
264 /// within the machine function.
265 FunctionInfo collectNeededZAStates(SMEAttrs SMEFnAttrs);
266
267 /// Assigns each edge bundle a ZA state based on the needed states of blocks
268 /// that have incoming or outgoing edges in that bundle.
269 SmallVector<ZAState> assignBundleZAStates(const EdgeBundles &Bundles,
270 const FunctionInfo &FnInfo);
271
272 /// Inserts code to handle changes between ZA states within the function.
273 /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
274 void insertStateChanges(EmitContext &, const FunctionInfo &FnInfo,
275 const EdgeBundles &Bundles,
276 ArrayRef<ZAState> BundleStates);
277
278 /// Propagates desired states forwards (from predecessors -> successors) if
279 /// \p Forwards, otherwise, propagates backwards (from successors ->
280 /// predecessors).
281 void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true);
282
283 // Emission routines for private and shared ZA functions (using lazy saves).
284 void emitNewZAPrologue(MachineBasicBlock &MBB,
286 void emitRestoreLazySave(EmitContext &, MachineBasicBlock &MBB,
288 LiveRegs PhysLiveRegs);
289 void emitSetupLazySave(EmitContext &, MachineBasicBlock &MBB,
291 void emitAllocateLazySaveBuffer(EmitContext &, MachineBasicBlock &MBB,
294 bool ClearTPIDR2);
295
296 // Emission routines for agnostic ZA functions.
297 void emitSetupFullZASave(MachineBasicBlock &MBB,
299 LiveRegs PhysLiveRegs);
300 // Emit a "full" ZA save or restore. It is "full" in the sense that this
301 // function will emit a call to __arm_sme_save or __arm_sme_restore, which
302 // handles saving and restoring both ZA and ZT0.
303 void emitFullZASaveRestore(EmitContext &, MachineBasicBlock &MBB,
305 LiveRegs PhysLiveRegs, bool IsSave);
306 void emitAllocateFullZASaveBuffer(EmitContext &, MachineBasicBlock &MBB,
308 LiveRegs PhysLiveRegs);
309
310 /// Attempts to find an insertion point before \p Inst where the status flags
311 /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of
312 /// the block is found.
313 std::pair<MachineBasicBlock::iterator, LiveRegs>
314 findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block,
316 void emitStateChange(EmitContext &, MachineBasicBlock &MBB,
317 MachineBasicBlock::iterator MBBI, ZAState From,
318 ZAState To, LiveRegs PhysLiveRegs);
319
320 // Helpers for switching between lazy/full ZA save/restore routines.
321 void emitZASave(EmitContext &Context, MachineBasicBlock &MBB,
323 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
324 return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs,
325 /*IsSave=*/true);
326 return emitSetupLazySave(Context, MBB, MBBI);
327 }
328 void emitZARestore(EmitContext &Context, MachineBasicBlock &MBB,
330 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
331 return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs,
332 /*IsSave=*/false);
333 return emitRestoreLazySave(Context, MBB, MBBI, PhysLiveRegs);
334 }
335 void emitAllocateZASaveBuffer(EmitContext &Context, MachineBasicBlock &MBB,
337 LiveRegs PhysLiveRegs) {
338 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
339 return emitAllocateFullZASaveBuffer(Context, MBB, MBBI, PhysLiveRegs);
340 return emitAllocateLazySaveBuffer(Context, MBB, MBBI);
341 }
342
343 /// Save live physical registers to virtual registers.
344 PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB,
346 /// Restore physical registers from a save of their previous values.
347 void restorePhyRegSave(const PhysRegSave &RegSave, MachineBasicBlock &MBB,
349
350private:
352
353 MachineFunction *MF = nullptr;
354 const AArch64Subtarget *Subtarget = nullptr;
355 const AArch64RegisterInfo *TRI = nullptr;
356 const AArch64FunctionInfo *AFI = nullptr;
357 const TargetInstrInfo *TII = nullptr;
358 MachineRegisterInfo *MRI = nullptr;
359 MachineLoopInfo *MLI = nullptr;
360};
361
362static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) {
363 LiveRegs PhysLiveRegs = LiveRegs::None;
364 if (!LiveUnits.available(AArch64::NZCV))
365 PhysLiveRegs |= LiveRegs::NZCV;
366 // We have to track W0 and X0 separately as otherwise things can get
367 // confused if we attempt to preserve X0 but only W0 was defined.
368 if (!LiveUnits.available(AArch64::W0))
369 PhysLiveRegs |= LiveRegs::W0;
370 if (!LiveUnits.available(AArch64::W0_HI))
371 PhysLiveRegs |= LiveRegs::W0_HI;
372 return PhysLiveRegs;
373}
374
375static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) {
376 if (PhysLiveRegs & LiveRegs::NZCV)
377 LiveUnits.addReg(AArch64::NZCV);
378 if (PhysLiveRegs & LiveRegs::W0)
379 LiveUnits.addReg(AArch64::W0);
380 if (PhysLiveRegs & LiveRegs::W0_HI)
381 LiveUnits.addReg(AArch64::W0_HI);
382}
383
384FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
385 assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() ||
386 SMEFnAttrs.hasZAState()) &&
387 "Expected function to have ZA/ZT0 state!");
388
390 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
391 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
392
393 for (MachineBasicBlock &MBB : *MF) {
394 BlockInfo &Block = Blocks[MBB.getNumber()];
395
396 if (MBB.isEntryBlock()) {
397 // Entry block:
398 Block.FixedEntryState = SMEFnAttrs.hasPrivateZAInterface()
399 ? ZAState::CALLER_DORMANT
400 : ZAState::ACTIVE;
401 } else if (MBB.isEHPad()) {
402 // EH entry block:
403 Block.FixedEntryState = ZAState::LOCAL_SAVED;
404 }
405
406 LiveRegUnits LiveUnits(*TRI);
407 LiveUnits.addLiveOuts(MBB);
408
409 Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits);
410 auto FirstTerminatorInsertPt = MBB.getFirstTerminator();
411 auto FirstNonPhiInsertPt = MBB.getFirstNonPHI();
412 for (MachineInstr &MI : reverse(MBB)) {
414 LiveUnits.stepBackward(MI);
415 LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits);
416 // The SMEStateAllocPseudo marker is added to a function if the save
417 // buffer was allocated in SelectionDAG. It marks the end of the
418 // allocation -- which is a safe point for this pass to insert any TPIDR2
419 // block setup.
420 if (MI.getOpcode() == AArch64::SMEStateAllocPseudo) {
421 AfterSMEProloguePt = MBBI;
422 PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
423 }
424 // Note: We treat Agnostic ZA as inout_za with an alternate save/restore.
425 auto [NeededState, InsertPt] = getZAStateBeforeInst(
426 *TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface());
427 assert((InsertPt == MBBI ||
428 InsertPt->getOpcode() == AArch64::ADJCALLSTACKDOWN) &&
429 "Unexpected state change insertion point!");
430 // TODO: Do something to avoid state changes where NZCV is live.
431 if (MBBI == FirstTerminatorInsertPt)
432 Block.PhysLiveRegsAtExit = PhysLiveRegs;
433 if (MBBI == FirstNonPhiInsertPt)
434 Block.PhysLiveRegsAtEntry = PhysLiveRegs;
435 if (NeededState != ZAState::ANY)
436 Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs});
437 }
438
439 // Reverse vector (as we had to iterate backwards for liveness).
440 std::reverse(Block.Insts.begin(), Block.Insts.end());
441
442 // Record the desired states on entry/exit of this block. These are the
443 // states that would not incur a state transition.
444 if (!Block.Insts.empty()) {
445 Block.DesiredIncomingState = Block.Insts.front().NeededState;
446 Block.DesiredOutgoingState = Block.Insts.back().NeededState;
447 }
448 }
449
450 return FunctionInfo{std::move(Blocks), AfterSMEProloguePt,
451 PhysLiveRegsAfterSMEPrologue};
452}
453
454void MachineSMEABI::propagateDesiredStates(FunctionInfo &FnInfo,
455 bool Forwards) {
456 // If `Forwards`, this propagates desired states from predecessors to
457 // successors, otherwise, this propagates states from successors to
458 // predecessors.
459 auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & {
460 return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState;
461 };
462
464 for (auto [BlockID, BlockInfo] : enumerate(FnInfo.Blocks)) {
465 if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards)))
466 Worklist.push_back(MF->getBlockNumbered(BlockID));
467 }
468
469 while (!Worklist.empty()) {
470 MachineBasicBlock *MBB = Worklist.pop_back_val();
471 BlockInfo &Block = FnInfo.Blocks[MBB->getNumber()];
472
473 // Pick a legal edge bundle state that matches the majority of
474 // predecessors/successors.
475 int StateCounts[ZAState::NUM_ZA_STATE] = {0};
476 for (MachineBasicBlock *PredOrSucc :
477 Forwards ? predecessors(MBB) : successors(MBB)) {
478 BlockInfo &PredOrSuccBlock = FnInfo.Blocks[PredOrSucc->getNumber()];
479 ZAState ZAState = GetBlockState(PredOrSuccBlock, !Forwards);
480 if (isLegalEdgeBundleZAState(ZAState))
481 StateCounts[ZAState]++;
482 }
483
484 ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts);
485 ZAState &CurrentState = GetBlockState(Block, Forwards);
486 if (PropagatedState != CurrentState) {
487 CurrentState = PropagatedState;
488 ZAState &OtherState = GetBlockState(Block, !Forwards);
489 // Propagate to the incoming/outgoing state if that is also "ANY".
490 if (OtherState == ZAState::ANY)
491 OtherState = PropagatedState;
492 // Push any successors/predecessors that may need updating to the
493 // worklist.
494 for (MachineBasicBlock *SuccOrPred :
495 Forwards ? successors(MBB) : predecessors(MBB)) {
496 BlockInfo &SuccOrPredBlock = FnInfo.Blocks[SuccOrPred->getNumber()];
497 if (!isLegalEdgeBundleZAState(GetBlockState(SuccOrPredBlock, Forwards)))
498 Worklist.push_back(SuccOrPred);
499 }
500 }
501 }
502}
503
504/// Assigns each edge bundle a ZA state based on the needed states of blocks
505/// that have incoming or outgoing edges in that bundle.
507MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
508 const FunctionInfo &FnInfo) {
509 SmallVector<ZAState> BundleStates(Bundles.getNumBundles());
510 for (unsigned I = 0, E = Bundles.getNumBundles(); I != E; ++I) {
511 LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n');
512
513 // Attempt to assign a ZA state for this bundle that minimizes state
514 // transitions. Edges within loops are given a higher weight as we assume
515 // they will be executed more than once.
516 int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
517 for (unsigned BlockID : Bundles.getBlocks(I)) {
518 LLVM_DEBUG(dbgs() << "- bb." << BlockID);
519
520 const BlockInfo &Block = FnInfo.Blocks[BlockID];
521 bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I;
522 bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I;
523
524 bool LegalInEdge =
525 InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState);
526 bool LegalOutEgde =
527 OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState);
528 if (LegalInEdge) {
529 LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
530 << getZAStateString(Block.DesiredIncomingState));
531 EdgeStateCounts[Block.DesiredIncomingState]++;
532 }
533 if (LegalOutEgde) {
534 LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
535 << getZAStateString(Block.DesiredOutgoingState));
536 EdgeStateCounts[Block.DesiredOutgoingState]++;
537 }
538 if (!LegalInEdge && !LegalOutEgde)
539 LLVM_DEBUG(dbgs() << " (no state preference)");
540 LLVM_DEBUG(dbgs() << '\n');
541 }
542
543 ZAState BundleState =
544 ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
545
546 if (BundleState == ZAState::ANY)
547 BundleState = ZAState::ACTIVE;
548
549 LLVM_DEBUG({
550 dbgs() << "Chosen ZA state: " << getZAStateString(BundleState) << '\n'
551 << "Edge counts:";
552 for (auto [State, Count] : enumerate(EdgeStateCounts))
553 dbgs() << " " << getZAStateString(ZAState(State)) << ": " << Count;
554 dbgs() << "\n\n";
555 });
556
557 BundleStates[I] = BundleState;
558 }
559
560 return BundleStates;
561}
562
563std::pair<MachineBasicBlock::iterator, LiveRegs>
564MachineSMEABI::findStateChangeInsertionPoint(
565 MachineBasicBlock &MBB, const BlockInfo &Block,
567 LiveRegs PhysLiveRegs;
569 if (Inst != Block.Insts.end()) {
570 InsertPt = Inst->InsertPt;
571 PhysLiveRegs = Inst->PhysLiveRegs;
572 } else {
573 InsertPt = MBB.getFirstTerminator();
574 PhysLiveRegs = Block.PhysLiveRegsAtExit;
575 }
576
577 if (!(PhysLiveRegs & LiveRegs::NZCV))
578 return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags).
579
580 // Find the previous state change. We can not move before this point.
581 MachineBasicBlock::iterator PrevStateChangeI;
582 if (Inst == Block.Insts.begin()) {
583 PrevStateChangeI = MBB.begin();
584 } else {
585 // Note: `std::prev(Inst)` is the previous InstInfo. We only create an
586 // InstInfo object for instructions that require a specific ZA state, so the
587 // InstInfo is the site of the previous state change in the block (which can
588 // be several MIs earlier).
589 PrevStateChangeI = std::prev(Inst)->InsertPt;
590 }
591
592 // Note: LiveUnits will only accurately track X0 and NZCV.
593 LiveRegUnits LiveUnits(*TRI);
594 setPhysLiveRegs(LiveUnits, PhysLiveRegs);
595 for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) {
596 // Don't move before/into a call (which may have a state change before it).
597 if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall())
598 break;
599 LiveUnits.stepBackward(*I);
600 if (LiveUnits.available(AArch64::NZCV))
601 return {I, getPhysLiveRegs(LiveUnits)};
602 }
603 return {InsertPt, PhysLiveRegs};
604}
605
606void MachineSMEABI::insertStateChanges(EmitContext &Context,
607 const FunctionInfo &FnInfo,
608 const EdgeBundles &Bundles,
609 ArrayRef<ZAState> BundleStates) {
610 for (MachineBasicBlock &MBB : *MF) {
611 const BlockInfo &Block = FnInfo.Blocks[MBB.getNumber()];
612 ZAState InState = BundleStates[Bundles.getBundle(MBB.getNumber(),
613 /*Out=*/false)];
614
615 ZAState CurrentState = Block.FixedEntryState;
616 if (CurrentState == ZAState::ANY)
617 CurrentState = InState;
618
619 for (auto &Inst : Block.Insts) {
620 if (CurrentState != Inst.NeededState) {
621 auto [InsertPt, PhysLiveRegs] =
622 findStateChangeInsertionPoint(MBB, Block, &Inst);
623 emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState,
624 PhysLiveRegs);
625 CurrentState = Inst.NeededState;
626 }
627 }
628
629 if (MBB.succ_empty())
630 continue;
631
632 ZAState OutState =
633 BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)];
634 if (CurrentState != OutState) {
635 auto [InsertPt, PhysLiveRegs] =
636 findStateChangeInsertionPoint(MBB, Block, Block.Insts.end());
637 emitStateChange(Context, MBB, InsertPt, CurrentState, OutState,
638 PhysLiveRegs);
639 }
640 }
641}
642
645 if (MBBI != MBB.end())
646 return MBBI->getDebugLoc();
647 return DebugLoc();
648}
649
650void MachineSMEABI::emitSetupLazySave(EmitContext &Context,
654
655 // Get pointer to TPIDR2 block.
656 Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
657 Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
658 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
659 .addFrameIndex(Context.getTPIDR2Block(*MF))
660 .addImm(0)
661 .addImm(0);
662 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), TPIDR2Ptr)
663 .addReg(TPIDR2);
664 // Set TPIDR2_EL0 to point to TPIDR2 block.
665 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
666 .addImm(AArch64SysReg::TPIDR2_EL0)
667 .addReg(TPIDR2Ptr);
668}
669
670PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs,
673 DebugLoc DL) {
674 PhysRegSave RegSave{PhysLiveRegs};
675 if (PhysLiveRegs & LiveRegs::NZCV) {
676 RegSave.StatusFlags = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
677 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), RegSave.StatusFlags)
678 .addImm(AArch64SysReg::NZCV)
679 .addReg(AArch64::NZCV, RegState::Implicit);
680 }
681 // Note: Preserving X0 is "free" as this is before register allocation, so
682 // the register allocator is still able to optimize these copies.
683 if (PhysLiveRegs & LiveRegs::W0) {
684 RegSave.X0Save = MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI
685 ? &AArch64::GPR64RegClass
686 : &AArch64::GPR32RegClass);
687 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), RegSave.X0Save)
688 .addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0);
689 }
690 return RegSave;
691}
692
693void MachineSMEABI::restorePhyRegSave(const PhysRegSave &RegSave,
696 DebugLoc DL) {
697 if (RegSave.StatusFlags != AArch64::NoRegister)
698 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
699 .addImm(AArch64SysReg::NZCV)
700 .addReg(RegSave.StatusFlags)
701 .addReg(AArch64::NZCV, RegState::ImplicitDefine);
702
703 if (RegSave.X0Save != AArch64::NoRegister)
704 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY),
705 RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0)
706 .addReg(RegSave.X0Save);
707}
708
709void MachineSMEABI::emitRestoreLazySave(EmitContext &Context,
712 LiveRegs PhysLiveRegs) {
713 auto *TLI = Subtarget->getTargetLowering();
715 Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
716 Register TPIDR2 = AArch64::X0;
717
718 // TODO: Emit these within the restore MBB to prevent unnecessary saves.
719 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
720
721 // Enable ZA.
722 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
723 .addImm(AArch64SVCR::SVCRZA)
724 .addImm(1);
725 // Get current TPIDR2_EL0.
726 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), TPIDR2EL0)
727 .addImm(AArch64SysReg::TPIDR2_EL0);
728 // Get pointer to TPIDR2 block.
729 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
730 .addFrameIndex(Context.getTPIDR2Block(*MF))
731 .addImm(0)
732 .addImm(0);
733 // (Conditionally) restore ZA state.
734 BuildMI(MBB, MBBI, DL, TII->get(AArch64::RestoreZAPseudo))
735 .addReg(TPIDR2EL0)
736 .addReg(TPIDR2)
737 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_RESTORE))
738 .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
739 // Zero TPIDR2_EL0.
740 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
741 .addImm(AArch64SysReg::TPIDR2_EL0)
742 .addReg(AArch64::XZR);
743
744 restorePhyRegSave(RegSave, MBB, MBBI, DL);
745}
746
747void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
749 bool ClearTPIDR2) {
751
752 if (ClearTPIDR2)
753 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
754 .addImm(AArch64SysReg::TPIDR2_EL0)
755 .addReg(AArch64::XZR);
756
757 // Disable ZA.
758 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
759 .addImm(AArch64SVCR::SVCRZA)
760 .addImm(0);
761}
762
763void MachineSMEABI::emitAllocateLazySaveBuffer(
764 EmitContext &Context, MachineBasicBlock &MBB,
766 MachineFrameInfo &MFI = MF->getFrameInfo();
768 Register SP = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
769 Register SVL = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
770 Register Buffer = AFI->getEarlyAllocSMESaveBuffer();
771
772 // Calculate SVL.
773 BuildMI(MBB, MBBI, DL, TII->get(AArch64::RDSVLI_XI), SVL).addImm(1);
774
775 // 1. Allocate the lazy save buffer.
776 if (Buffer == AArch64::NoRegister) {
777 // TODO: On Windows, we allocate the lazy save buffer in SelectionDAG (so
778 // Buffer != AArch64::NoRegister). This is done to reuse the existing
779 // expansions (which can insert stack checks). This works, but it means we
780 // will always allocate the lazy save buffer (even if the function contains
781 // no lazy saves). If we want to handle Windows here, we'll need to
782 // implement something similar to LowerWindowsDYNAMIC_STACKALLOC.
783 assert(!Subtarget->isTargetWindows() &&
784 "Lazy ZA save is not yet supported on Windows");
785 Buffer = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
786 // Get original stack pointer.
787 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), SP)
788 .addReg(AArch64::SP);
789 // Allocate a lazy-save buffer object of the size given, normally SVL * SVL
790 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSUBXrrr), Buffer)
791 .addReg(SVL)
792 .addReg(SVL)
793 .addReg(SP);
794 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::SP)
795 .addReg(Buffer);
796 // We have just allocated a variable sized object, tell this to PEI.
797 MFI.CreateVariableSizedObject(Align(16), nullptr);
798 }
799
800 // 2. Setup the TPIDR2 block.
801 {
802 // Note: This case just needs to do `SVL << 48`. It is not implemented as we
803 // generally don't support big-endian SVE/SME.
804 if (!Subtarget->isLittleEndian())
806 "TPIDR2 block initialization is not supported on big-endian targets");
807
808 // Store buffer pointer and num_za_save_slices.
809 // Bytes 10-15 are implicitly zeroed.
810 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STPXi))
811 .addReg(Buffer)
812 .addReg(SVL)
813 .addFrameIndex(Context.getTPIDR2Block(*MF))
814 .addImm(0);
815 }
816}
817
818void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB,
820 auto *TLI = Subtarget->getTargetLowering();
822
823 // Get current TPIDR2_EL0.
824 Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
825 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS))
826 .addReg(TPIDR2EL0, RegState::Define)
827 .addImm(AArch64SysReg::TPIDR2_EL0);
828 // If TPIDR2_EL0 is non-zero, commit the lazy save.
829 // NOTE: Functions that only use ZT0 don't need to zero ZA.
830 bool ZeroZA = AFI->getSMEFnAttrs().hasZAState();
831 auto CommitZASave =
832 BuildMI(MBB, MBBI, DL, TII->get(AArch64::CommitZASavePseudo))
833 .addReg(TPIDR2EL0)
834 .addImm(ZeroZA ? 1 : 0)
835 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_SAVE))
836 .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
837 if (ZeroZA)
838 CommitZASave.addDef(AArch64::ZAB0, RegState::ImplicitDefine);
839 // Enable ZA (as ZA could have previously been in the OFF state).
840 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
841 .addImm(AArch64SVCR::SVCRZA)
842 .addImm(1);
843}
844
845void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context,
848 LiveRegs PhysLiveRegs, bool IsSave) {
849 auto *TLI = Subtarget->getTargetLowering();
851 Register BufferPtr = AArch64::X0;
852
853 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
854
855 // Copy the buffer pointer into X0.
856 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
857 .addReg(Context.getAgnosticZABufferPtr(*MF));
858
859 // Call __arm_sme_save/__arm_sme_restore.
860 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
861 .addReg(BufferPtr, RegState::Implicit)
862 .addExternalSymbol(TLI->getLibcallName(
863 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE))
864 .addRegMask(TRI->getCallPreservedMask(
865 *MF,
867
868 restorePhyRegSave(RegSave, MBB, MBBI, DL);
869}
870
871void MachineSMEABI::emitAllocateFullZASaveBuffer(
872 EmitContext &Context, MachineBasicBlock &MBB,
874 // Buffer already allocated in SelectionDAG.
876 return;
877
879 Register BufferPtr = Context.getAgnosticZABufferPtr(*MF);
880 Register BufferSize = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
881
882 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
883
884 // Calculate the SME state size.
885 {
886 auto *TLI = Subtarget->getTargetLowering();
887 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
888 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
889 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_SME_STATE_SIZE))
890 .addReg(AArch64::X0, RegState::ImplicitDefine)
891 .addRegMask(TRI->getCallPreservedMask(
892 *MF, CallingConv::
894 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferSize)
895 .addReg(AArch64::X0);
896 }
897
898 // Allocate a buffer object of the size given __arm_sme_state_size.
899 {
900 MachineFrameInfo &MFI = MF->getFrameInfo();
901 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
902 .addReg(AArch64::SP)
903 .addReg(BufferSize)
905 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
906 .addReg(AArch64::SP);
907
908 // We have just allocated a variable sized object, tell this to PEI.
909 MFI.CreateVariableSizedObject(Align(16), nullptr);
910 }
911
912 restorePhyRegSave(RegSave, MBB, MBBI, DL);
913}
914
915void MachineSMEABI::emitStateChange(EmitContext &Context,
918 ZAState From, ZAState To,
919 LiveRegs PhysLiveRegs) {
920 // ZA not used.
921 if (From == ZAState::ANY || To == ZAState::ANY)
922 return;
923
924 // If we're exiting from the CALLER_DORMANT state that means this new ZA
925 // function did not touch ZA (so ZA was never turned on).
926 if (From == ZAState::CALLER_DORMANT && To == ZAState::OFF)
927 return;
928
929 // TODO: Avoid setting up the save buffer if there's no transition to
930 // LOCAL_SAVED.
931 if (From == ZAState::CALLER_DORMANT) {
933 "CALLER_DORMANT state requires private ZA interface");
934 assert(&MBB == &MBB.getParent()->front() &&
935 "CALLER_DORMANT state only valid in entry block");
936 emitNewZAPrologue(MBB, MBB.getFirstNonPHI());
937 if (To == ZAState::ACTIVE)
938 return; // Nothing more to do (ZA is active after the prologue).
939
940 // Note: "emitNewZAPrologue" zeros ZA, so we may need to setup a lazy save
941 // if "To" is "ZAState::LOCAL_SAVED". It may be possible to improve this
942 // case by changing the placement of the zero instruction.
943 From = ZAState::ACTIVE;
944 }
945
946 if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
947 emitZASave(Context, MBB, InsertPt, PhysLiveRegs);
948 else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
949 emitZARestore(Context, MBB, InsertPt, PhysLiveRegs);
950 else if (To == ZAState::OFF) {
951 assert(From != ZAState::CALLER_DORMANT &&
952 "CALLER_DORMANT to OFF should have already been handled");
954 "Should not turn ZA off in agnostic ZA function");
955 emitZAOff(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED);
956 } else {
957 dbgs() << "Error: Transition from " << getZAStateString(From) << " to "
958 << getZAStateString(To) << '\n';
959 llvm_unreachable("Unimplemented state transition");
960 }
961}
962
963} // end anonymous namespace
964
965INITIALIZE_PASS(MachineSMEABI, "aarch64-machine-sme-abi", "Machine SME ABI",
966 false, false)
967
968bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
969 if (!MF.getSubtarget<AArch64Subtarget>().hasSME())
970 return false;
971
972 AFI = MF.getInfo<AArch64FunctionInfo>();
973 SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
974 if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State() &&
975 !SMEFnAttrs.hasAgnosticZAInterface())
976 return false;
977
978 assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!");
979
980 this->MF = &MF;
981 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
982 TII = Subtarget->getInstrInfo();
983 TRI = Subtarget->getRegisterInfo();
984 MRI = &MF.getRegInfo();
985
986 const EdgeBundles &Bundles =
987 getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
988
989 FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs);
990
991 if (OptLevel != CodeGenOptLevel::None) {
992 // Propagate desired states forward, then backwards. Most of the propagation
993 // should be done in the forward step, and backwards propagation is then
994 // used to fill in the gaps. Note: Doing both in one step can give poor
995 // results. For example, consider this subgraph:
996 //
997 // ┌─────┐
998 // ┌─┤ BB0 ◄───┐
999 // │ └─┬───┘ │
1000 // │ ┌─▼───◄──┐│
1001 // │ │ BB1 │ ││
1002 // │ └─┬┬──┘ ││
1003 // │ │└─────┘│
1004 // │ ┌─▼───┐ │
1005 // │ │ BB2 ├───┘
1006 // │ └─┬───┘
1007 // │ ┌─▼───┐
1008 // └─► BB3 │
1009 // └─────┘
1010 //
1011 // If:
1012 // - "BB0" and "BB2" (outer loop) has no state preference
1013 // - "BB1" (inner loop) desires the ACTIVE state on entry/exit
1014 // - "BB3" desires the LOCAL_SAVED state on entry
1015 //
1016 // If we propagate forwards first, ACTIVE is propagated from BB1 to BB2,
1017 // then from BB2 to BB0. Which results in the inner and outer loops having
1018 // the "ACTIVE" state. This avoids any state changes in the loops.
1019 //
1020 // If we propagate backwards first, we _could_ propagate LOCAL_SAVED from
1021 // BB3 to BB0, which would result in a transition from ACTIVE -> LOCAL_SAVED
1022 // in the outer loop.
1023 for (bool Forwards : {true, false})
1024 propagateDesiredStates(FnInfo, Forwards);
1025 }
1026
1027 SmallVector<ZAState> BundleStates = assignBundleZAStates(Bundles, FnInfo);
1028
1029 EmitContext Context;
1030 insertStateChanges(Context, FnInfo, Bundles, BundleStates);
1031
1032 if (Context.needsSaveBuffer()) {
1033 if (FnInfo.AfterSMEProloguePt) {
1034 // Note: With inline stack probes the AfterSMEProloguePt may not be in the
1035 // entry block (due to the probing loop).
1036 MachineBasicBlock::iterator MBBI = *FnInfo.AfterSMEProloguePt;
1037 emitAllocateZASaveBuffer(Context, *MBBI->getParent(), MBBI,
1038 FnInfo.PhysLiveRegsAfterSMEPrologue);
1039 } else {
1040 MachineBasicBlock &EntryBlock = MF.front();
1041 emitAllocateZASaveBuffer(
1042 Context, EntryBlock, EntryBlock.getFirstNonPHI(),
1043 FnInfo.Blocks[EntryBlock.getNumber()].PhysLiveRegsAtEntry);
1044 }
1045 }
1046
1047 return true;
1048}
1049
1051 return new MachineSMEABI(OptLevel);
1052}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
#define MAKE_CASE(V)
Register const TargetRegisterInfo * TRI
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
A debug info location.
Definition DebugLoc.h:124
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
Definition EdgeBundles.h:53
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition EdgeBundles.h:47
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition EdgeBundles.h:50
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasAgnosticZAInterface() const
bool hasPrivateZAInterface() const
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
Definition CallingConv.h:21
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
auto successors(const MachineBasicBlock *BB)
FunctionPass * createMachineSMEABIPass(CodeGenOptLevel)
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
@ Default
-O2, -Os
Definition CodeGen.h:85
@ LLVM_MARK_AS_BITMASK_ENUM
Definition ModRef.h:37
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2030
auto predecessors(const MachineBasicBlock *BB)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...