LLVM 22.0.0git
MachineSMEABIPass.cpp
Go to the documentation of this file.
1//===- MachineSMEABIPass.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements the SME ABI requirements for ZA state. This includes
10// implementing the lazy (and agnostic) ZA state save schemes around calls.
11//
12//===----------------------------------------------------------------------===//
13//
14// This pass works by collecting instructions that require ZA to be in a
15// specific state (e.g., "ACTIVE" or "SAVED") and inserting the necessary state
16// transitions to ensure ZA is in the required state before instructions. State
17// transitions represent actions such as setting up or restoring a lazy save.
18// Certain points within a function may also have predefined states independent
19// of any instructions, for example, a "shared_za" function is always entered
20// and exited in the "ACTIVE" state.
21//
22// To handle ZA state across control flow, we make use of edge bundling. This
23// assigns each block an "incoming" and "outgoing" edge bundle (representing
24// incoming and outgoing edges). Initially, these are unique to each block;
25// then, in the process of forming bundles, the outgoing bundle of a block is
26// joined with the incoming bundle of all successors. The result is that each
27// bundle can be assigned a single ZA state, which ensures the state required by
28// all a blocks' successors is the same, and that each basic block will always
29// be entered with the same ZA state. This eliminates the need for splitting
30// edges to insert state transitions or "phi" nodes for ZA states.
31//
32// See below for a simple example of edge bundling.
33//
34// The following shows a conditionally executed basic block (BB1):
35//
36// if (cond)
37// BB1
38// BB2
39//
40// Initial Bundles Joined Bundles
41//
42// ┌──0──┐ ┌──0──┐
43// │ BB0 │ │ BB0 │
44// └──1──┘ └──1──┘
45// ├───────┐ ├───────┐
46// ▼ │ ▼ │
47// ┌──2──┐ │ ─────► ┌──1──┐ │
48// │ BB1 │ ▼ │ BB1 │ ▼
49// └──3──┘ ┌──4──┐ └──1──┘ ┌──1──┐
50// └───►4 BB2 │ └───►1 BB2 │
51// └──5──┘ └──2──┘
52//
53// On the left are the initial per-block bundles, and on the right are the
54// joined bundles (which are the result of the EdgeBundles analysis).
55
56#include "AArch64InstrInfo.h"
58#include "AArch64Subtarget.h"
68
69using namespace llvm;
70
71#define DEBUG_TYPE "aarch64-machine-sme-abi"
72
73namespace {
74
75enum ZAState {
76 // Any/unknown state (not valid)
77 ANY = 0,
78
79 // ZA is in use and active (i.e. within the accumulator)
80 ACTIVE,
81
82 // A ZA save has been set up or committed (i.e. ZA is dormant or off)
83 LOCAL_SAVED,
84
85 // ZA is off or a lazy save has been set up by the caller
86 CALLER_DORMANT,
87
88 // ZA is off
89 OFF,
90
91 // The number of ZA states (not a valid state)
92 NUM_ZA_STATE
93};
94
95/// A bitmask enum to record live physical registers that the "emit*" routines
96/// may need to preserve. Note: This only tracks registers we may clobber.
97enum LiveRegs : uint8_t {
98 None = 0,
99 NZCV = 1 << 0,
100 W0 = 1 << 1,
101 W0_HI = 1 << 2,
102 X0 = W0 | W0_HI,
103 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ W0_HI)
104};
105
106/// Holds the virtual registers live physical registers have been saved to.
107struct PhysRegSave {
108 LiveRegs PhysLiveRegs;
109 Register StatusFlags = AArch64::NoRegister;
110 Register X0Save = AArch64::NoRegister;
111};
112
113/// Contains the needed ZA state (and live registers) at an instruction. That is
114/// the state ZA must be in _before_ "InsertPt".
115struct InstInfo {
116 ZAState NeededState{ZAState::ANY};
118 LiveRegs PhysLiveRegs = LiveRegs::None;
119};
120
121/// Contains the needed ZA state for each instruction in a block. Instructions
122/// that do not require a ZA state are not recorded.
123struct BlockInfo {
125 ZAState FixedEntryState{ZAState::ANY};
126 ZAState DesiredIncomingState{ZAState::ANY};
127 ZAState DesiredOutgoingState{ZAState::ANY};
128 LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
129 LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
130};
131
132/// Contains the needed ZA state information for all blocks within a function.
133struct FunctionInfo {
135 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
136 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
137};
138
139/// State/helpers that is only needed when emitting code to handle
140/// saving/restoring ZA.
141class EmitContext {
142public:
143 EmitContext() = default;
144
145 /// Get or create a TPIDR2 block in \p MF.
146 int getTPIDR2Block(MachineFunction &MF) {
147 if (TPIDR2BlockFI)
148 return *TPIDR2BlockFI;
149 MachineFrameInfo &MFI = MF.getFrameInfo();
150 TPIDR2BlockFI = MFI.CreateStackObject(16, Align(16), false);
151 return *TPIDR2BlockFI;
152 }
153
154 /// Get or create agnostic ZA buffer pointer in \p MF.
155 Register getAgnosticZABufferPtr(MachineFunction &MF) {
156 if (AgnosticZABufferPtr != AArch64::NoRegister)
157 return AgnosticZABufferPtr;
158 Register BufferPtr =
159 MF.getInfo<AArch64FunctionInfo>()->getEarlyAllocSMESaveBuffer();
160 AgnosticZABufferPtr =
161 BufferPtr != AArch64::NoRegister
162 ? BufferPtr
163 : MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
164 return AgnosticZABufferPtr;
165 }
166
167 /// Returns true if the function must allocate a ZA save buffer on entry. This
168 /// will be the case if, at any point in the function, a ZA save was emitted.
169 bool needsSaveBuffer() const {
170 assert(!(TPIDR2BlockFI && AgnosticZABufferPtr) &&
171 "Cannot have both a TPIDR2 block and agnostic ZA buffer");
172 return TPIDR2BlockFI || AgnosticZABufferPtr != AArch64::NoRegister;
173 }
174
175private:
176 std::optional<int> TPIDR2BlockFI;
177 Register AgnosticZABufferPtr = AArch64::NoRegister;
178};
179
180/// Checks if \p State is a legal edge bundle state. For a state to be a legal
181/// bundle state, it must be possible to transition from it to any other bundle
182/// state without losing any ZA state. This is the case for ACTIVE/LOCAL_SAVED,
183/// as you can transition between those states by saving/restoring ZA. The OFF
184/// state would not be legal, as transitioning to it drops the content of ZA.
185static bool isLegalEdgeBundleZAState(ZAState State) {
186 switch (State) {
187 case ZAState::ACTIVE: // ZA state within the accumulator/ZT0.
188 case ZAState::LOCAL_SAVED: // ZA state is saved on the stack.
189 return true;
190 default:
191 return false;
192 }
193}
194
195StringRef getZAStateString(ZAState State) {
196#define MAKE_CASE(V) \
197 case V: \
198 return #V;
199 switch (State) {
200 MAKE_CASE(ZAState::ANY)
201 MAKE_CASE(ZAState::ACTIVE)
202 MAKE_CASE(ZAState::LOCAL_SAVED)
203 MAKE_CASE(ZAState::CALLER_DORMANT)
204 MAKE_CASE(ZAState::OFF)
205 default:
206 llvm_unreachable("Unexpected ZAState");
207 }
208#undef MAKE_CASE
209}
210
211static bool isZAorZTRegOp(const TargetRegisterInfo &TRI,
212 const MachineOperand &MO) {
213 if (!MO.isReg() || !MO.getReg().isPhysical())
214 return false;
215 return any_of(TRI.subregs_inclusive(MO.getReg()), [](const MCPhysReg &SR) {
216 return AArch64::MPR128RegClass.contains(SR) ||
217 AArch64::ZTRRegClass.contains(SR);
218 });
219}
220
221/// Returns the required ZA state needed before \p MI and an iterator pointing
222/// to where any code required to change the ZA state should be inserted.
223static std::pair<ZAState, MachineBasicBlock::iterator>
224getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI,
225 bool ZAOffAtReturn) {
227
228 if (MI.getOpcode() == AArch64::InOutZAUsePseudo)
229 return {ZAState::ACTIVE, std::prev(InsertPt)};
230
231 if (MI.getOpcode() == AArch64::RequiresZASavePseudo)
232 return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
233
234 if (MI.isReturn())
235 return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
236
237 for (auto &MO : MI.operands()) {
238 if (isZAorZTRegOp(TRI, MO))
239 return {ZAState::ACTIVE, InsertPt};
240 }
241
242 return {ZAState::ANY, InsertPt};
243}
244
245struct MachineSMEABI : public MachineFunctionPass {
246 inline static char ID = 0;
247
248 MachineSMEABI(CodeGenOptLevel OptLevel = CodeGenOptLevel::Default)
249 : MachineFunctionPass(ID), OptLevel(OptLevel) {}
250
251 bool runOnMachineFunction(MachineFunction &MF) override;
252
253 StringRef getPassName() const override { return "Machine SME ABI pass"; }
254
255 void getAnalysisUsage(AnalysisUsage &AU) const override {
256 AU.setPreservesCFG();
261 }
262
263 /// Collects the needed ZA state (and live registers) before each instruction
264 /// within the machine function.
265 FunctionInfo collectNeededZAStates(SMEAttrs SMEFnAttrs);
266
267 /// Assigns each edge bundle a ZA state based on the needed states of blocks
268 /// that have incoming or outgoing edges in that bundle.
269 SmallVector<ZAState> assignBundleZAStates(const EdgeBundles &Bundles,
270 const FunctionInfo &FnInfo);
271
272 /// Inserts code to handle changes between ZA states within the function.
273 /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
274 void insertStateChanges(EmitContext &, const FunctionInfo &FnInfo,
275 const EdgeBundles &Bundles,
276 ArrayRef<ZAState> BundleStates);
277
278 /// Propagates desired states forwards (from predecessors -> successors) if
279 /// \p Forwards, otherwise, propagates backwards (from successors ->
280 /// predecessors).
281 void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true);
282
283 // Emission routines for private and shared ZA functions (using lazy saves).
284 void emitNewZAPrologue(MachineBasicBlock &MBB,
286 void emitRestoreLazySave(EmitContext &, MachineBasicBlock &MBB,
288 LiveRegs PhysLiveRegs);
289 void emitSetupLazySave(EmitContext &, MachineBasicBlock &MBB,
291 void emitAllocateLazySaveBuffer(EmitContext &, MachineBasicBlock &MBB,
294 bool ClearTPIDR2);
295
296 // Emission routines for agnostic ZA functions.
297 void emitSetupFullZASave(MachineBasicBlock &MBB,
299 LiveRegs PhysLiveRegs);
300 // Emit a "full" ZA save or restore. It is "full" in the sense that this
301 // function will emit a call to __arm_sme_save or __arm_sme_restore, which
302 // handles saving and restoring both ZA and ZT0.
303 void emitFullZASaveRestore(EmitContext &, MachineBasicBlock &MBB,
305 LiveRegs PhysLiveRegs, bool IsSave);
306 void emitAllocateFullZASaveBuffer(EmitContext &, MachineBasicBlock &MBB,
308 LiveRegs PhysLiveRegs);
309
310 /// Attempts to find an insertion point before \p Inst where the status flags
311 /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of
312 /// the block is found.
313 std::pair<MachineBasicBlock::iterator, LiveRegs>
314 findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block,
316 void emitStateChange(EmitContext &, MachineBasicBlock &MBB,
317 MachineBasicBlock::iterator MBBI, ZAState From,
318 ZAState To, LiveRegs PhysLiveRegs);
319
320 // Helpers for switching between lazy/full ZA save/restore routines.
321 void emitZASave(EmitContext &Context, MachineBasicBlock &MBB,
323 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
324 return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs,
325 /*IsSave=*/true);
326 return emitSetupLazySave(Context, MBB, MBBI);
327 }
328 void emitZARestore(EmitContext &Context, MachineBasicBlock &MBB,
330 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
331 return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs,
332 /*IsSave=*/false);
333 return emitRestoreLazySave(Context, MBB, MBBI, PhysLiveRegs);
334 }
335 void emitAllocateZASaveBuffer(EmitContext &Context, MachineBasicBlock &MBB,
337 LiveRegs PhysLiveRegs) {
338 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
339 return emitAllocateFullZASaveBuffer(Context, MBB, MBBI, PhysLiveRegs);
340 return emitAllocateLazySaveBuffer(Context, MBB, MBBI);
341 }
342
343 /// Save live physical registers to virtual registers.
344 PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB,
346 /// Restore physical registers from a save of their previous values.
347 void restorePhyRegSave(const PhysRegSave &RegSave, MachineBasicBlock &MBB,
349
350private:
352
353 MachineFunction *MF = nullptr;
354 const AArch64Subtarget *Subtarget = nullptr;
355 const AArch64RegisterInfo *TRI = nullptr;
356 const AArch64FunctionInfo *AFI = nullptr;
357 const TargetInstrInfo *TII = nullptr;
358 MachineRegisterInfo *MRI = nullptr;
359 MachineLoopInfo *MLI = nullptr;
360};
361
362static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) {
363 LiveRegs PhysLiveRegs = LiveRegs::None;
364 if (!LiveUnits.available(AArch64::NZCV))
365 PhysLiveRegs |= LiveRegs::NZCV;
366 // We have to track W0 and X0 separately as otherwise things can get
367 // confused if we attempt to preserve X0 but only W0 was defined.
368 if (!LiveUnits.available(AArch64::W0))
369 PhysLiveRegs |= LiveRegs::W0;
370 if (!LiveUnits.available(AArch64::W0_HI))
371 PhysLiveRegs |= LiveRegs::W0_HI;
372 return PhysLiveRegs;
373}
374
375static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) {
376 if (PhysLiveRegs & LiveRegs::NZCV)
377 LiveUnits.addReg(AArch64::NZCV);
378 if (PhysLiveRegs & LiveRegs::W0)
379 LiveUnits.addReg(AArch64::W0);
380 if (PhysLiveRegs & LiveRegs::W0_HI)
381 LiveUnits.addReg(AArch64::W0_HI);
382}
383
384[[maybe_unused]] bool isCallStartOpcode(unsigned Opc) {
385 switch (Opc) {
386 case AArch64::TLSDESC_CALLSEQ:
387 case AArch64::TLSDESC_AUTH_CALLSEQ:
388 case AArch64::ADJCALLSTACKDOWN:
389 return true;
390 default:
391 return false;
392 }
393}
394
395FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
396 assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() ||
397 SMEFnAttrs.hasZAState()) &&
398 "Expected function to have ZA/ZT0 state!");
399
401 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
402 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
403
404 for (MachineBasicBlock &MBB : *MF) {
405 BlockInfo &Block = Blocks[MBB.getNumber()];
406
407 if (MBB.isEntryBlock()) {
408 // Entry block:
409 Block.FixedEntryState = SMEFnAttrs.hasPrivateZAInterface()
410 ? ZAState::CALLER_DORMANT
411 : ZAState::ACTIVE;
412 } else if (MBB.isEHPad()) {
413 // EH entry block:
414 Block.FixedEntryState = ZAState::LOCAL_SAVED;
415 }
416
417 LiveRegUnits LiveUnits(*TRI);
418 LiveUnits.addLiveOuts(MBB);
419
420 Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits);
421 auto FirstTerminatorInsertPt = MBB.getFirstTerminator();
422 auto FirstNonPhiInsertPt = MBB.getFirstNonPHI();
423 for (MachineInstr &MI : reverse(MBB)) {
425 LiveUnits.stepBackward(MI);
426 LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits);
427 // The SMEStateAllocPseudo marker is added to a function if the save
428 // buffer was allocated in SelectionDAG. It marks the end of the
429 // allocation -- which is a safe point for this pass to insert any TPIDR2
430 // block setup.
431 if (MI.getOpcode() == AArch64::SMEStateAllocPseudo) {
432 AfterSMEProloguePt = MBBI;
433 PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
434 }
435 // Note: We treat Agnostic ZA as inout_za with an alternate save/restore.
436 auto [NeededState, InsertPt] = getZAStateBeforeInst(
437 *TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface());
438 assert((InsertPt == MBBI || isCallStartOpcode(InsertPt->getOpcode())) &&
439 "Unexpected state change insertion point!");
440 // TODO: Do something to avoid state changes where NZCV is live.
441 if (MBBI == FirstTerminatorInsertPt)
442 Block.PhysLiveRegsAtExit = PhysLiveRegs;
443 if (MBBI == FirstNonPhiInsertPt)
444 Block.PhysLiveRegsAtEntry = PhysLiveRegs;
445 if (NeededState != ZAState::ANY)
446 Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs});
447 }
448
449 // Reverse vector (as we had to iterate backwards for liveness).
450 std::reverse(Block.Insts.begin(), Block.Insts.end());
451
452 // Record the desired states on entry/exit of this block. These are the
453 // states that would not incur a state transition.
454 if (!Block.Insts.empty()) {
455 Block.DesiredIncomingState = Block.Insts.front().NeededState;
456 Block.DesiredOutgoingState = Block.Insts.back().NeededState;
457 }
458 }
459
460 return FunctionInfo{std::move(Blocks), AfterSMEProloguePt,
461 PhysLiveRegsAfterSMEPrologue};
462}
463
464void MachineSMEABI::propagateDesiredStates(FunctionInfo &FnInfo,
465 bool Forwards) {
466 // If `Forwards`, this propagates desired states from predecessors to
467 // successors, otherwise, this propagates states from successors to
468 // predecessors.
469 auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & {
470 return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState;
471 };
472
474 for (auto [BlockID, BlockInfo] : enumerate(FnInfo.Blocks)) {
475 if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards)))
476 Worklist.push_back(MF->getBlockNumbered(BlockID));
477 }
478
479 while (!Worklist.empty()) {
480 MachineBasicBlock *MBB = Worklist.pop_back_val();
481 BlockInfo &Block = FnInfo.Blocks[MBB->getNumber()];
482
483 // Pick a legal edge bundle state that matches the majority of
484 // predecessors/successors.
485 int StateCounts[ZAState::NUM_ZA_STATE] = {0};
486 for (MachineBasicBlock *PredOrSucc :
487 Forwards ? predecessors(MBB) : successors(MBB)) {
488 BlockInfo &PredOrSuccBlock = FnInfo.Blocks[PredOrSucc->getNumber()];
489 ZAState ZAState = GetBlockState(PredOrSuccBlock, !Forwards);
490 if (isLegalEdgeBundleZAState(ZAState))
491 StateCounts[ZAState]++;
492 }
493
494 ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts);
495 ZAState &CurrentState = GetBlockState(Block, Forwards);
496 if (PropagatedState != CurrentState) {
497 CurrentState = PropagatedState;
498 ZAState &OtherState = GetBlockState(Block, !Forwards);
499 // Propagate to the incoming/outgoing state if that is also "ANY".
500 if (OtherState == ZAState::ANY)
501 OtherState = PropagatedState;
502 // Push any successors/predecessors that may need updating to the
503 // worklist.
504 for (MachineBasicBlock *SuccOrPred :
505 Forwards ? successors(MBB) : predecessors(MBB)) {
506 BlockInfo &SuccOrPredBlock = FnInfo.Blocks[SuccOrPred->getNumber()];
507 if (!isLegalEdgeBundleZAState(GetBlockState(SuccOrPredBlock, Forwards)))
508 Worklist.push_back(SuccOrPred);
509 }
510 }
511 }
512}
513
514/// Assigns each edge bundle a ZA state based on the needed states of blocks
515/// that have incoming or outgoing edges in that bundle.
517MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
518 const FunctionInfo &FnInfo) {
519 SmallVector<ZAState> BundleStates(Bundles.getNumBundles());
520 for (unsigned I = 0, E = Bundles.getNumBundles(); I != E; ++I) {
521 LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n');
522
523 // Attempt to assign a ZA state for this bundle that minimizes state
524 // transitions. Edges within loops are given a higher weight as we assume
525 // they will be executed more than once.
526 int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
527 for (unsigned BlockID : Bundles.getBlocks(I)) {
528 LLVM_DEBUG(dbgs() << "- bb." << BlockID);
529
530 const BlockInfo &Block = FnInfo.Blocks[BlockID];
531 bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I;
532 bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I;
533
534 bool LegalInEdge =
535 InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState);
536 bool LegalOutEgde =
537 OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState);
538 if (LegalInEdge) {
539 LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
540 << getZAStateString(Block.DesiredIncomingState));
541 EdgeStateCounts[Block.DesiredIncomingState]++;
542 }
543 if (LegalOutEgde) {
544 LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
545 << getZAStateString(Block.DesiredOutgoingState));
546 EdgeStateCounts[Block.DesiredOutgoingState]++;
547 }
548 if (!LegalInEdge && !LegalOutEgde)
549 LLVM_DEBUG(dbgs() << " (no state preference)");
550 LLVM_DEBUG(dbgs() << '\n');
551 }
552
553 ZAState BundleState =
554 ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
555
556 if (BundleState == ZAState::ANY)
557 BundleState = ZAState::ACTIVE;
558
559 LLVM_DEBUG({
560 dbgs() << "Chosen ZA state: " << getZAStateString(BundleState) << '\n'
561 << "Edge counts:";
562 for (auto [State, Count] : enumerate(EdgeStateCounts))
563 dbgs() << " " << getZAStateString(ZAState(State)) << ": " << Count;
564 dbgs() << "\n\n";
565 });
566
567 BundleStates[I] = BundleState;
568 }
569
570 return BundleStates;
571}
572
573std::pair<MachineBasicBlock::iterator, LiveRegs>
574MachineSMEABI::findStateChangeInsertionPoint(
575 MachineBasicBlock &MBB, const BlockInfo &Block,
577 LiveRegs PhysLiveRegs;
579 if (Inst != Block.Insts.end()) {
580 InsertPt = Inst->InsertPt;
581 PhysLiveRegs = Inst->PhysLiveRegs;
582 } else {
583 InsertPt = MBB.getFirstTerminator();
584 PhysLiveRegs = Block.PhysLiveRegsAtExit;
585 }
586
587 if (!(PhysLiveRegs & LiveRegs::NZCV))
588 return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags).
589
590 // Find the previous state change. We can not move before this point.
591 MachineBasicBlock::iterator PrevStateChangeI;
592 if (Inst == Block.Insts.begin()) {
593 PrevStateChangeI = MBB.begin();
594 } else {
595 // Note: `std::prev(Inst)` is the previous InstInfo. We only create an
596 // InstInfo object for instructions that require a specific ZA state, so the
597 // InstInfo is the site of the previous state change in the block (which can
598 // be several MIs earlier).
599 PrevStateChangeI = std::prev(Inst)->InsertPt;
600 }
601
602 // Note: LiveUnits will only accurately track X0 and NZCV.
603 LiveRegUnits LiveUnits(*TRI);
604 setPhysLiveRegs(LiveUnits, PhysLiveRegs);
605 for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) {
606 // Don't move before/into a call (which may have a state change before it).
607 if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall())
608 break;
609 LiveUnits.stepBackward(*I);
610 if (LiveUnits.available(AArch64::NZCV))
611 return {I, getPhysLiveRegs(LiveUnits)};
612 }
613 return {InsertPt, PhysLiveRegs};
614}
615
616void MachineSMEABI::insertStateChanges(EmitContext &Context,
617 const FunctionInfo &FnInfo,
618 const EdgeBundles &Bundles,
619 ArrayRef<ZAState> BundleStates) {
620 for (MachineBasicBlock &MBB : *MF) {
621 const BlockInfo &Block = FnInfo.Blocks[MBB.getNumber()];
622 ZAState InState = BundleStates[Bundles.getBundle(MBB.getNumber(),
623 /*Out=*/false)];
624
625 ZAState CurrentState = Block.FixedEntryState;
626 if (CurrentState == ZAState::ANY)
627 CurrentState = InState;
628
629 for (auto &Inst : Block.Insts) {
630 if (CurrentState != Inst.NeededState) {
631 auto [InsertPt, PhysLiveRegs] =
632 findStateChangeInsertionPoint(MBB, Block, &Inst);
633 emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState,
634 PhysLiveRegs);
635 CurrentState = Inst.NeededState;
636 }
637 }
638
639 if (MBB.succ_empty())
640 continue;
641
642 ZAState OutState =
643 BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)];
644 if (CurrentState != OutState) {
645 auto [InsertPt, PhysLiveRegs] =
646 findStateChangeInsertionPoint(MBB, Block, Block.Insts.end());
647 emitStateChange(Context, MBB, InsertPt, CurrentState, OutState,
648 PhysLiveRegs);
649 }
650 }
651}
652
655 if (MBBI != MBB.end())
656 return MBBI->getDebugLoc();
657 return DebugLoc();
658}
659
660void MachineSMEABI::emitSetupLazySave(EmitContext &Context,
664
665 // Get pointer to TPIDR2 block.
666 Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
667 Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
668 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
669 .addFrameIndex(Context.getTPIDR2Block(*MF))
670 .addImm(0)
671 .addImm(0);
672 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), TPIDR2Ptr)
673 .addReg(TPIDR2);
674 // Set TPIDR2_EL0 to point to TPIDR2 block.
675 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
676 .addImm(AArch64SysReg::TPIDR2_EL0)
677 .addReg(TPIDR2Ptr);
678}
679
680PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs,
683 DebugLoc DL) {
684 PhysRegSave RegSave{PhysLiveRegs};
685 if (PhysLiveRegs & LiveRegs::NZCV) {
686 RegSave.StatusFlags = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
687 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), RegSave.StatusFlags)
688 .addImm(AArch64SysReg::NZCV)
689 .addReg(AArch64::NZCV, RegState::Implicit);
690 }
691 // Note: Preserving X0 is "free" as this is before register allocation, so
692 // the register allocator is still able to optimize these copies.
693 if (PhysLiveRegs & LiveRegs::W0) {
694 RegSave.X0Save = MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI
695 ? &AArch64::GPR64RegClass
696 : &AArch64::GPR32RegClass);
697 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), RegSave.X0Save)
698 .addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0);
699 }
700 return RegSave;
701}
702
703void MachineSMEABI::restorePhyRegSave(const PhysRegSave &RegSave,
706 DebugLoc DL) {
707 if (RegSave.StatusFlags != AArch64::NoRegister)
708 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
709 .addImm(AArch64SysReg::NZCV)
710 .addReg(RegSave.StatusFlags)
711 .addReg(AArch64::NZCV, RegState::ImplicitDefine);
712
713 if (RegSave.X0Save != AArch64::NoRegister)
714 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY),
715 RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0)
716 .addReg(RegSave.X0Save);
717}
718
719void MachineSMEABI::emitRestoreLazySave(EmitContext &Context,
722 LiveRegs PhysLiveRegs) {
723 auto *TLI = Subtarget->getTargetLowering();
725 Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
726 Register TPIDR2 = AArch64::X0;
727
728 // TODO: Emit these within the restore MBB to prevent unnecessary saves.
729 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
730
731 // Enable ZA.
732 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
733 .addImm(AArch64SVCR::SVCRZA)
734 .addImm(1);
735 // Get current TPIDR2_EL0.
736 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), TPIDR2EL0)
737 .addImm(AArch64SysReg::TPIDR2_EL0);
738 // Get pointer to TPIDR2 block.
739 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
740 .addFrameIndex(Context.getTPIDR2Block(*MF))
741 .addImm(0)
742 .addImm(0);
743 // (Conditionally) restore ZA state.
744 BuildMI(MBB, MBBI, DL, TII->get(AArch64::RestoreZAPseudo))
745 .addReg(TPIDR2EL0)
746 .addReg(TPIDR2)
747 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_RESTORE))
748 .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
749 // Zero TPIDR2_EL0.
750 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
751 .addImm(AArch64SysReg::TPIDR2_EL0)
752 .addReg(AArch64::XZR);
753
754 restorePhyRegSave(RegSave, MBB, MBBI, DL);
755}
756
757void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
759 bool ClearTPIDR2) {
761
762 if (ClearTPIDR2)
763 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
764 .addImm(AArch64SysReg::TPIDR2_EL0)
765 .addReg(AArch64::XZR);
766
767 // Disable ZA.
768 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
769 .addImm(AArch64SVCR::SVCRZA)
770 .addImm(0);
771}
772
773void MachineSMEABI::emitAllocateLazySaveBuffer(
774 EmitContext &Context, MachineBasicBlock &MBB,
776 MachineFrameInfo &MFI = MF->getFrameInfo();
778 Register SP = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
779 Register SVL = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
780 Register Buffer = AFI->getEarlyAllocSMESaveBuffer();
781
782 // Calculate SVL.
783 BuildMI(MBB, MBBI, DL, TII->get(AArch64::RDSVLI_XI), SVL).addImm(1);
784
785 // 1. Allocate the lazy save buffer.
786 if (Buffer == AArch64::NoRegister) {
787 // TODO: On Windows, we allocate the lazy save buffer in SelectionDAG (so
788 // Buffer != AArch64::NoRegister). This is done to reuse the existing
789 // expansions (which can insert stack checks). This works, but it means we
790 // will always allocate the lazy save buffer (even if the function contains
791 // no lazy saves). If we want to handle Windows here, we'll need to
792 // implement something similar to LowerWindowsDYNAMIC_STACKALLOC.
793 assert(!Subtarget->isTargetWindows() &&
794 "Lazy ZA save is not yet supported on Windows");
795 Buffer = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
796 // Get original stack pointer.
797 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), SP)
798 .addReg(AArch64::SP);
799 // Allocate a lazy-save buffer object of the size given, normally SVL * SVL
800 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSUBXrrr), Buffer)
801 .addReg(SVL)
802 .addReg(SVL)
803 .addReg(SP);
804 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::SP)
805 .addReg(Buffer);
806 // We have just allocated a variable sized object, tell this to PEI.
807 MFI.CreateVariableSizedObject(Align(16), nullptr);
808 }
809
810 // 2. Setup the TPIDR2 block.
811 {
812 // Note: This case just needs to do `SVL << 48`. It is not implemented as we
813 // generally don't support big-endian SVE/SME.
814 if (!Subtarget->isLittleEndian())
816 "TPIDR2 block initialization is not supported on big-endian targets");
817
818 // Store buffer pointer and num_za_save_slices.
819 // Bytes 10-15 are implicitly zeroed.
820 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STPXi))
821 .addReg(Buffer)
822 .addReg(SVL)
823 .addFrameIndex(Context.getTPIDR2Block(*MF))
824 .addImm(0);
825 }
826}
827
828void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB,
830 auto *TLI = Subtarget->getTargetLowering();
832
833 // Get current TPIDR2_EL0.
834 Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
835 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS))
836 .addReg(TPIDR2EL0, RegState::Define)
837 .addImm(AArch64SysReg::TPIDR2_EL0);
838 // If TPIDR2_EL0 is non-zero, commit the lazy save.
839 // NOTE: Functions that only use ZT0 don't need to zero ZA.
840 bool ZeroZA = AFI->getSMEFnAttrs().hasZAState();
841 auto CommitZASave =
842 BuildMI(MBB, MBBI, DL, TII->get(AArch64::CommitZASavePseudo))
843 .addReg(TPIDR2EL0)
844 .addImm(ZeroZA ? 1 : 0)
845 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_SAVE))
846 .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
847 if (ZeroZA)
848 CommitZASave.addDef(AArch64::ZAB0, RegState::ImplicitDefine);
849 // Enable ZA (as ZA could have previously been in the OFF state).
850 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
851 .addImm(AArch64SVCR::SVCRZA)
852 .addImm(1);
853}
854
855void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context,
858 LiveRegs PhysLiveRegs, bool IsSave) {
859 auto *TLI = Subtarget->getTargetLowering();
861 Register BufferPtr = AArch64::X0;
862
863 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
864
865 // Copy the buffer pointer into X0.
866 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
867 .addReg(Context.getAgnosticZABufferPtr(*MF));
868
869 // Call __arm_sme_save/__arm_sme_restore.
870 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
871 .addReg(BufferPtr, RegState::Implicit)
872 .addExternalSymbol(TLI->getLibcallName(
873 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE))
874 .addRegMask(TRI->getCallPreservedMask(
875 *MF,
877
878 restorePhyRegSave(RegSave, MBB, MBBI, DL);
879}
880
881void MachineSMEABI::emitAllocateFullZASaveBuffer(
882 EmitContext &Context, MachineBasicBlock &MBB,
884 // Buffer already allocated in SelectionDAG.
886 return;
887
889 Register BufferPtr = Context.getAgnosticZABufferPtr(*MF);
890 Register BufferSize = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
891
892 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
893
894 // Calculate the SME state size.
895 {
896 auto *TLI = Subtarget->getTargetLowering();
897 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
898 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
899 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_SME_STATE_SIZE))
900 .addReg(AArch64::X0, RegState::ImplicitDefine)
901 .addRegMask(TRI->getCallPreservedMask(
902 *MF, CallingConv::
904 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferSize)
905 .addReg(AArch64::X0);
906 }
907
908 // Allocate a buffer object of the size given __arm_sme_state_size.
909 {
910 MachineFrameInfo &MFI = MF->getFrameInfo();
911 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
912 .addReg(AArch64::SP)
913 .addReg(BufferSize)
915 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
916 .addReg(AArch64::SP);
917
918 // We have just allocated a variable sized object, tell this to PEI.
919 MFI.CreateVariableSizedObject(Align(16), nullptr);
920 }
921
922 restorePhyRegSave(RegSave, MBB, MBBI, DL);
923}
924
925void MachineSMEABI::emitStateChange(EmitContext &Context,
928 ZAState From, ZAState To,
929 LiveRegs PhysLiveRegs) {
930 // ZA not used.
931 if (From == ZAState::ANY || To == ZAState::ANY)
932 return;
933
934 // If we're exiting from the CALLER_DORMANT state that means this new ZA
935 // function did not touch ZA (so ZA was never turned on).
936 if (From == ZAState::CALLER_DORMANT && To == ZAState::OFF)
937 return;
938
939 // TODO: Avoid setting up the save buffer if there's no transition to
940 // LOCAL_SAVED.
941 if (From == ZAState::CALLER_DORMANT) {
943 "CALLER_DORMANT state requires private ZA interface");
944 assert(&MBB == &MBB.getParent()->front() &&
945 "CALLER_DORMANT state only valid in entry block");
946 emitNewZAPrologue(MBB, MBB.getFirstNonPHI());
947 if (To == ZAState::ACTIVE)
948 return; // Nothing more to do (ZA is active after the prologue).
949
950 // Note: "emitNewZAPrologue" zeros ZA, so we may need to setup a lazy save
951 // if "To" is "ZAState::LOCAL_SAVED". It may be possible to improve this
952 // case by changing the placement of the zero instruction.
953 From = ZAState::ACTIVE;
954 }
955
956 if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
957 emitZASave(Context, MBB, InsertPt, PhysLiveRegs);
958 else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
959 emitZARestore(Context, MBB, InsertPt, PhysLiveRegs);
960 else if (To == ZAState::OFF) {
961 assert(From != ZAState::CALLER_DORMANT &&
962 "CALLER_DORMANT to OFF should have already been handled");
964 "Should not turn ZA off in agnostic ZA function");
965 emitZAOff(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED);
966 } else {
967 dbgs() << "Error: Transition from " << getZAStateString(From) << " to "
968 << getZAStateString(To) << '\n';
969 llvm_unreachable("Unimplemented state transition");
970 }
971}
972
973} // end anonymous namespace
974
975INITIALIZE_PASS(MachineSMEABI, "aarch64-machine-sme-abi", "Machine SME ABI",
976 false, false)
977
978bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
979 if (!MF.getSubtarget<AArch64Subtarget>().hasSME())
980 return false;
981
982 AFI = MF.getInfo<AArch64FunctionInfo>();
983 SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
984 if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State() &&
985 !SMEFnAttrs.hasAgnosticZAInterface())
986 return false;
987
988 assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!");
989
990 this->MF = &MF;
991 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
992 TII = Subtarget->getInstrInfo();
993 TRI = Subtarget->getRegisterInfo();
994 MRI = &MF.getRegInfo();
995
996 const EdgeBundles &Bundles =
997 getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
998
999 FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs);
1000
1001 if (OptLevel != CodeGenOptLevel::None) {
1002 // Propagate desired states forward, then backwards. Most of the propagation
1003 // should be done in the forward step, and backwards propagation is then
1004 // used to fill in the gaps. Note: Doing both in one step can give poor
1005 // results. For example, consider this subgraph:
1006 //
1007 // ┌─────┐
1008 // ┌─┤ BB0 ◄───┐
1009 // │ └─┬───┘ │
1010 // │ ┌─▼───◄──┐│
1011 // │ │ BB1 │ ││
1012 // │ └─┬┬──┘ ││
1013 // │ │└─────┘│
1014 // │ ┌─▼───┐ │
1015 // │ │ BB2 ├───┘
1016 // │ └─┬───┘
1017 // │ ┌─▼───┐
1018 // └─► BB3 │
1019 // └─────┘
1020 //
1021 // If:
1022 // - "BB0" and "BB2" (outer loop) has no state preference
1023 // - "BB1" (inner loop) desires the ACTIVE state on entry/exit
1024 // - "BB3" desires the LOCAL_SAVED state on entry
1025 //
1026 // If we propagate forwards first, ACTIVE is propagated from BB1 to BB2,
1027 // then from BB2 to BB0. Which results in the inner and outer loops having
1028 // the "ACTIVE" state. This avoids any state changes in the loops.
1029 //
1030 // If we propagate backwards first, we _could_ propagate LOCAL_SAVED from
1031 // BB3 to BB0, which would result in a transition from ACTIVE -> LOCAL_SAVED
1032 // in the outer loop.
1033 for (bool Forwards : {true, false})
1034 propagateDesiredStates(FnInfo, Forwards);
1035 }
1036
1037 SmallVector<ZAState> BundleStates = assignBundleZAStates(Bundles, FnInfo);
1038
1039 EmitContext Context;
1040 insertStateChanges(Context, FnInfo, Bundles, BundleStates);
1041
1042 if (Context.needsSaveBuffer()) {
1043 if (FnInfo.AfterSMEProloguePt) {
1044 // Note: With inline stack probes the AfterSMEProloguePt may not be in the
1045 // entry block (due to the probing loop).
1046 MachineBasicBlock::iterator MBBI = *FnInfo.AfterSMEProloguePt;
1047 emitAllocateZASaveBuffer(Context, *MBBI->getParent(), MBBI,
1048 FnInfo.PhysLiveRegsAfterSMEPrologue);
1049 } else {
1050 MachineBasicBlock &EntryBlock = MF.front();
1051 emitAllocateZASaveBuffer(
1052 Context, EntryBlock, EntryBlock.getFirstNonPHI(),
1053 FnInfo.Blocks[EntryBlock.getNumber()].PhysLiveRegsAtEntry);
1054 }
1055 }
1056
1057 return true;
1058}
1059
1061 return new MachineSMEABI(OptLevel);
1062}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
#define MAKE_CASE(V)
Register const TargetRegisterInfo * TRI
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A debug info location.
Definition DebugLoc.h:124
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
Definition EdgeBundles.h:53
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition EdgeBundles.h:47
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition EdgeBundles.h:50
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasAgnosticZAInterface() const
bool hasPrivateZAInterface() const
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
Definition CallingConv.h:21
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
auto successors(const MachineBasicBlock *BB)
FunctionPass * createMachineSMEABIPass(CodeGenOptLevel)
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
@ Default
-O2, -Os
Definition CodeGen.h:85
@ LLVM_MARK_AS_BITMASK_ENUM
Definition ModRef.h:37
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2030
auto predecessors(const MachineBasicBlock *BB)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...