LLVM 23.0.0git
MachineSMEABIPass.cpp
Go to the documentation of this file.
1//===- MachineSMEABIPass.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements the SME ABI requirements for ZA state. This includes
10// implementing the lazy (and agnostic) ZA state save schemes around calls.
11//
12//===----------------------------------------------------------------------===//
13//
14// This pass works by collecting instructions that require ZA to be in a
15// specific state (e.g., "ACTIVE" or "SAVED") and inserting the necessary state
16// transitions to ensure ZA is in the required state before instructions. State
17// transitions represent actions such as setting up or restoring a lazy save.
18// Certain points within a function may also have predefined states independent
19// of any instructions, for example, a "shared_za" function is always entered
20// and exited in the "ACTIVE" state.
21//
22// To handle ZA state across control flow, we make use of edge bundling. This
23// assigns each block an "incoming" and "outgoing" edge bundle (representing
24// incoming and outgoing edges). Initially, these are unique to each block;
25// then, in the process of forming bundles, the outgoing bundle of a block is
26// joined with the incoming bundle of all successors. The result is that each
27// bundle can be assigned a single ZA state, which ensures the state required by
28// all a blocks' successors is the same, and that each basic block will always
29// be entered with the same ZA state. This eliminates the need for splitting
30// edges to insert state transitions or "phi" nodes for ZA states.
31//
32// See below for a simple example of edge bundling.
33//
34// The following shows a conditionally executed basic block (BB1):
35//
36// if (cond)
37// BB1
38// BB2
39//
40// Initial Bundles Joined Bundles
41//
42// ┌──0──┐ ┌──0──┐
43// │ BB0 │ │ BB0 │
44// └──1──┘ └──1──┘
45// ├───────┐ ├───────┐
46// ▼ │ ▼ │
47// ┌──2──┐ │ ─────► ┌──1──┐ │
48// │ BB1 │ ▼ │ BB1 │ ▼
49// └──3──┘ ┌──4──┐ └──1──┘ ┌──1──┐
50// └───►4 BB2 │ └───►1 BB2 │
51// └──5──┘ └──2──┘
52//
53// On the left are the initial per-block bundles, and on the right are the
54// joined bundles (which are the result of the EdgeBundles analysis).
55
56#include "AArch64InstrInfo.h"
58#include "AArch64Subtarget.h"
69
70using namespace llvm;
71
72#define DEBUG_TYPE "aarch64-machine-sme-abi"
73
74namespace {
75
76// Note: For agnostic ZA, we assume the function is always entered/exited in the
77// "ACTIVE" state -- this _may_ not be the case (since OFF is also a
78// possibility, but for the purpose of placing ZA saves/restores, that does not
79// matter).
80enum ZAState : uint8_t {
81 // Any/unknown state (not valid)
82 ANY = 0,
83
84 // ZA is in use and active (i.e. within the accumulator)
85 ACTIVE,
86
87 // ZA is active, but ZT0 has been saved.
88 // This handles the edge case of sharedZA && !sharesZT0.
89 ACTIVE_ZT0_SAVED,
90
91 // A ZA save has been set up or committed (i.e. ZA is dormant or off)
92 // If the function uses ZT0 it must also be saved.
93 LOCAL_SAVED,
94
95 // ZA has been committed to the lazy save buffer of the current function.
96 // If the function uses ZT0 it must also be saved.
97 // ZA is off.
98 LOCAL_COMMITTED,
99
100 // The ZA/ZT0 state on entry to the function.
101 ENTRY,
102
103 // ZA is off.
104 OFF,
105
106 // The number of ZA states (not a valid state)
107 NUM_ZA_STATE
108};
109
110/// A bitmask enum to record live physical registers that the "emit*" routines
111/// may need to preserve. Note: This only tracks registers we may clobber.
112enum LiveRegs : uint8_t {
113 None = 0,
114 NZCV = 1 << 0,
115 W0 = 1 << 1,
116 W0_HI = 1 << 2,
117 X0 = W0 | W0_HI,
118 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ W0_HI)
119};
120
121/// Holds the virtual registers live physical registers have been saved to.
122struct PhysRegSave {
123 LiveRegs PhysLiveRegs;
124 Register StatusFlags = AArch64::NoRegister;
125 Register X0Save = AArch64::NoRegister;
126};
127
128/// Contains the needed ZA state (and live registers) at an instruction. That is
129/// the state ZA must be in _before_ "InsertPt".
130struct InstInfo {
131 ZAState NeededState{ZAState::ANY};
133 LiveRegs PhysLiveRegs = LiveRegs::None;
134};
135
136/// Contains the needed ZA state for each instruction in a block. Instructions
137/// that do not require a ZA state are not recorded.
138struct BlockInfo {
140 ZAState FixedEntryState{ZAState::ANY};
141 ZAState DesiredIncomingState{ZAState::ANY};
142 ZAState DesiredOutgoingState{ZAState::ANY};
143 LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
144 LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
145};
146
147/// Contains the needed ZA state information for all blocks within a function.
148struct FunctionInfo {
150 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
151 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
152};
153
154/// State/helpers that is only needed when emitting code to handle
155/// saving/restoring ZA.
156class EmitContext {
157public:
158 EmitContext() = default;
159
160 /// Get or create a TPIDR2 block in \p MF.
161 int getTPIDR2Block(MachineFunction &MF) {
162 if (TPIDR2BlockFI)
163 return *TPIDR2BlockFI;
164 MachineFrameInfo &MFI = MF.getFrameInfo();
165 TPIDR2BlockFI = MFI.CreateStackObject(16, Align(16), false);
166 return *TPIDR2BlockFI;
167 }
168
169 /// Get or create agnostic ZA buffer pointer in \p MF.
170 Register getAgnosticZABufferPtr(MachineFunction &MF) {
171 if (AgnosticZABufferPtr != AArch64::NoRegister)
172 return AgnosticZABufferPtr;
173 Register BufferPtr =
174 MF.getInfo<AArch64FunctionInfo>()->getEarlyAllocSMESaveBuffer();
175 AgnosticZABufferPtr =
176 BufferPtr != AArch64::NoRegister
177 ? BufferPtr
178 : MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
179 return AgnosticZABufferPtr;
180 }
181
182 int getZT0SaveSlot(MachineFunction &MF) {
183 if (ZT0SaveFI)
184 return *ZT0SaveFI;
185 MachineFrameInfo &MFI = MF.getFrameInfo();
186 ZT0SaveFI = MFI.CreateSpillStackObject(64, Align(16));
187 return *ZT0SaveFI;
188 }
189
190 /// Returns true if the function must allocate a ZA save buffer on entry. This
191 /// will be the case if, at any point in the function, a ZA save was emitted.
192 bool needsSaveBuffer() const {
193 assert(!(TPIDR2BlockFI && AgnosticZABufferPtr) &&
194 "Cannot have both a TPIDR2 block and agnostic ZA buffer");
195 return TPIDR2BlockFI || AgnosticZABufferPtr != AArch64::NoRegister;
196 }
197
198private:
199 std::optional<int> ZT0SaveFI;
200 std::optional<int> TPIDR2BlockFI;
201 Register AgnosticZABufferPtr = AArch64::NoRegister;
202};
203
204/// Checks if \p State is a legal edge bundle state. For a state to be a legal
205/// bundle state, it must be possible to transition from it to any other bundle
206/// state without losing any ZA state. This is the case for ACTIVE/LOCAL_SAVED,
207/// as you can transition between those states by saving/restoring ZA. The OFF
208/// state would not be legal, as transitioning to it drops the content of ZA.
209static bool isLegalEdgeBundleZAState(ZAState State) {
210 switch (State) {
211 case ZAState::ACTIVE: // ZA state within the accumulator/ZT0.
212 case ZAState::ACTIVE_ZT0_SAVED: // ZT0 is saved (ZA is active).
213 case ZAState::LOCAL_SAVED: // ZA state may be saved on the stack.
214 case ZAState::LOCAL_COMMITTED: // ZA state is saved on the stack.
215 return true;
216 default:
217 return false;
218 }
219}
220
221StringRef getZAStateString(ZAState State) {
222#define MAKE_CASE(V) \
223 case V: \
224 return #V;
225 switch (State) {
226 MAKE_CASE(ZAState::ANY)
227 MAKE_CASE(ZAState::ACTIVE)
228 MAKE_CASE(ZAState::ACTIVE_ZT0_SAVED)
229 MAKE_CASE(ZAState::LOCAL_SAVED)
230 MAKE_CASE(ZAState::LOCAL_COMMITTED)
231 MAKE_CASE(ZAState::ENTRY)
232 MAKE_CASE(ZAState::OFF)
233 default:
234 llvm_unreachable("Unexpected ZAState");
235 }
236#undef MAKE_CASE
237}
238
239static bool isZAorZTRegOp(const TargetRegisterInfo &TRI,
240 const MachineOperand &MO) {
241 if (!MO.isReg() || !MO.getReg().isPhysical())
242 return false;
243 return any_of(TRI.subregs_inclusive(MO.getReg()), [](const MCPhysReg &SR) {
244 return AArch64::MPR128RegClass.contains(SR) ||
245 AArch64::ZTRRegClass.contains(SR);
246 });
247}
248
249/// Returns the required ZA state needed before \p MI and an iterator pointing
250/// to where any code required to change the ZA state should be inserted.
251static std::pair<ZAState, MachineBasicBlock::iterator>
252getInstNeededZAState(const TargetRegisterInfo &TRI, MachineInstr &MI,
253 SMEAttrs SMEFnAttrs) {
255
256 // Note: InOutZAUsePseudo, RequiresZASavePseudo, and RequiresZT0SavePseudo are
257 // intended to mark the position immediately before a call. Due to
258 // SelectionDAG constraints, these markers occur after the ADJCALLSTACKDOWN,
259 // so we use std::prev(InsertPt) to get the position before the call.
260
261 if (MI.getOpcode() == AArch64::InOutZAUsePseudo)
262 return {ZAState::ACTIVE, std::prev(InsertPt)};
263
264 // Note: If we need to save both ZA and ZT0 we use RequiresZASavePseudo.
265 if (MI.getOpcode() == AArch64::RequiresZASavePseudo)
266 return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
267
268 // If we only need to save ZT0 there's two cases to consider:
269 // 1. The function has ZA state (that we don't need to save).
270 // - In this case we switch to the "ACTIVE_ZT0_SAVED" state.
271 // This only saves ZT0.
272 // 2. The function does not have ZA state
273 // - In this case we switch to "LOCAL_COMMITTED" state.
274 // This saves ZT0 and turns ZA off.
275 if (MI.getOpcode() == AArch64::RequiresZT0SavePseudo) {
276 return {SMEFnAttrs.hasZAState() ? ZAState::ACTIVE_ZT0_SAVED
277 : ZAState::LOCAL_COMMITTED,
278 std::prev(InsertPt)};
279 }
280
281 if (MI.isReturn()) {
282 bool ZAOffAtReturn = SMEFnAttrs.hasPrivateZAInterface();
283 return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
284 }
285
286 for (auto &MO : MI.operands()) {
287 if (isZAorZTRegOp(TRI, MO))
288 return {ZAState::ACTIVE, InsertPt};
289 }
290
291 return {ZAState::ANY, InsertPt};
292}
293
294struct MachineSMEABI : public MachineFunctionPass {
295 inline static char ID = 0;
296
297 MachineSMEABI(CodeGenOptLevel OptLevel = CodeGenOptLevel::Default)
298 : MachineFunctionPass(ID), OptLevel(OptLevel) {}
299
300 bool runOnMachineFunction(MachineFunction &MF) override;
301
302 StringRef getPassName() const override { return "Machine SME ABI pass"; }
303
304 void getAnalysisUsage(AnalysisUsage &AU) const override {
305 AU.setPreservesCFG();
312 }
313
314 /// Collects the needed ZA state (and live registers) before each instruction
315 /// within the machine function.
316 FunctionInfo collectNeededZAStates(SMEAttrs SMEFnAttrs);
317
318 /// Assigns each edge bundle a ZA state based on the needed states of blocks
319 /// that have incoming or outgoing edges in that bundle.
320 SmallVector<ZAState> assignBundleZAStates(const EdgeBundles &Bundles,
321 const FunctionInfo &FnInfo);
322
323 /// Inserts code to handle changes between ZA states within the function.
324 /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
325 void insertStateChanges(EmitContext &, const FunctionInfo &FnInfo,
326 const EdgeBundles &Bundles,
327 ArrayRef<ZAState> BundleStates);
328
329 /// Propagates desired states forwards (from predecessors -> successors) if
330 /// \p Forwards, otherwise, propagates backwards (from successors ->
331 /// predecessors).
332 void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true);
333
334 void addSMELibCall(MachineInstrBuilder &MIB, RTLIB::Libcall LC,
335 CallingConv::ID ExpectedCC);
336
337 void emitZT0SaveRestore(EmitContext &, MachineBasicBlock &MBB,
338 MachineBasicBlock::iterator MBBI, bool IsSave);
339
340 // Emission routines for private and shared ZA functions (using lazy saves).
341 void emitSMEPrologue(MachineBasicBlock &MBB,
343 void emitRestoreLazySave(EmitContext &, MachineBasicBlock &MBB,
345 LiveRegs PhysLiveRegs);
346 void emitSetupLazySave(EmitContext &, MachineBasicBlock &MBB,
348 void emitAllocateLazySaveBuffer(EmitContext &, MachineBasicBlock &MBB,
351 bool ClearTPIDR2, bool On);
352
353 // Emission routines for agnostic ZA functions.
354 void emitSetupFullZASave(MachineBasicBlock &MBB,
356 LiveRegs PhysLiveRegs);
357 // Emit a "full" ZA save or restore. It is "full" in the sense that this
358 // function will emit a call to __arm_sme_save or __arm_sme_restore, which
359 // handles saving and restoring both ZA and ZT0.
360 void emitFullZASaveRestore(EmitContext &, MachineBasicBlock &MBB,
362 LiveRegs PhysLiveRegs, bool IsSave);
363 void emitAllocateFullZASaveBuffer(EmitContext &, MachineBasicBlock &MBB,
365 LiveRegs PhysLiveRegs);
366
367 /// Attempts to find an insertion point before \p Inst where the status flags
368 /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of
369 /// the block is found.
370 std::pair<MachineBasicBlock::iterator, LiveRegs>
371 findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block,
373 void emitStateChange(EmitContext &, MachineBasicBlock &MBB,
374 MachineBasicBlock::iterator MBBI, ZAState From,
375 ZAState To, LiveRegs PhysLiveRegs);
376
377 // Helpers for switching between lazy/full ZA save/restore routines.
378 void emitZASave(EmitContext &Context, MachineBasicBlock &MBB,
380 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
381 return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs,
382 /*IsSave=*/true);
383 return emitSetupLazySave(Context, MBB, MBBI);
384 }
385 void emitZARestore(EmitContext &Context, MachineBasicBlock &MBB,
387 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
388 return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs,
389 /*IsSave=*/false);
390 return emitRestoreLazySave(Context, MBB, MBBI, PhysLiveRegs);
391 }
392 void emitAllocateZASaveBuffer(EmitContext &Context, MachineBasicBlock &MBB,
394 LiveRegs PhysLiveRegs) {
395 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
396 return emitAllocateFullZASaveBuffer(Context, MBB, MBBI, PhysLiveRegs);
397 return emitAllocateLazySaveBuffer(Context, MBB, MBBI);
398 }
399
400 /// Collects the reachable calls from \p MBBI marked with \p Marker. This is
401 /// intended to be used to emit lazy save remarks. Note: This stops at the
402 /// first marked call along any path.
403 void collectReachableMarkedCalls(const MachineBasicBlock &MBB,
406 unsigned Marker) const;
407
408 void emitCallSaveRemarks(const MachineBasicBlock &MBB,
410 unsigned Marker, StringRef RemarkName,
411 StringRef SaveName) const;
412
413 void emitError(const Twine &Message) {
414 LLVMContext &Context = MF->getFunction().getContext();
415 Context.emitError(MF->getName() + ": " + Message);
416 }
417
418 /// Save live physical registers to virtual registers.
419 PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB,
421 /// Restore physical registers from a save of their previous values.
422 void restorePhyRegSave(const PhysRegSave &RegSave, MachineBasicBlock &MBB,
424
425private:
427
428 MachineFunction *MF = nullptr;
429 const AArch64Subtarget *Subtarget = nullptr;
430 const AArch64RegisterInfo *TRI = nullptr;
431 const AArch64FunctionInfo *AFI = nullptr;
432 const AArch64InstrInfo *TII = nullptr;
433 const LibcallLoweringInfo *LLI = nullptr;
434
436 MachineRegisterInfo *MRI = nullptr;
437 MachineLoopInfo *MLI = nullptr;
438};
439
440static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) {
441 LiveRegs PhysLiveRegs = LiveRegs::None;
442 if (!LiveUnits.available(AArch64::NZCV))
443 PhysLiveRegs |= LiveRegs::NZCV;
444 // We have to track W0 and X0 separately as otherwise things can get
445 // confused if we attempt to preserve X0 but only W0 was defined.
446 if (!LiveUnits.available(AArch64::W0))
447 PhysLiveRegs |= LiveRegs::W0;
448 if (!LiveUnits.available(AArch64::W0_HI))
449 PhysLiveRegs |= LiveRegs::W0_HI;
450 return PhysLiveRegs;
451}
452
453static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) {
454 if (PhysLiveRegs & LiveRegs::NZCV)
455 LiveUnits.addReg(AArch64::NZCV);
456 if (PhysLiveRegs & LiveRegs::W0)
457 LiveUnits.addReg(AArch64::W0);
458 if (PhysLiveRegs & LiveRegs::W0_HI)
459 LiveUnits.addReg(AArch64::W0_HI);
460}
461
462[[maybe_unused]] bool isCallStartOpcode(unsigned Opc) {
463 switch (Opc) {
464 case AArch64::TLSDESC_CALLSEQ:
465 case AArch64::TLSDESC_AUTH_CALLSEQ:
466 case AArch64::ADJCALLSTACKDOWN:
467 return true;
468 default:
469 return false;
470 }
471}
472
473FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
474 assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() ||
475 SMEFnAttrs.hasZAState()) &&
476 "Expected function to have ZA/ZT0 state!");
477
479 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
480 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
481
482 for (MachineBasicBlock &MBB : *MF) {
483 BlockInfo &Block = Blocks[MBB.getNumber()];
484
485 if (MBB.isEntryBlock()) {
486 // Entry block:
487 Block.FixedEntryState = ZAState::ENTRY;
488 } else if (MBB.isEHPad()) {
489 // EH entry block:
490 Block.FixedEntryState = ZAState::LOCAL_COMMITTED;
491 }
492
493 LiveRegUnits LiveUnits(*TRI);
494 LiveUnits.addLiveOuts(MBB);
495
496 Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits);
497 auto FirstTerminatorInsertPt = MBB.getFirstTerminator();
498 auto FirstNonPhiInsertPt = MBB.getFirstNonPHI();
499 for (MachineInstr &MI : reverse(MBB)) {
501 LiveUnits.stepBackward(MI);
502 LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits);
503 // The SMEStateAllocPseudo marker is added to a function if the save
504 // buffer was allocated in SelectionDAG. It marks the end of the
505 // allocation -- which is a safe point for this pass to insert any TPIDR2
506 // block setup.
507 if (MI.getOpcode() == AArch64::SMEStateAllocPseudo) {
508 AfterSMEProloguePt = MBBI;
509 PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
510 }
511 // Note: We treat Agnostic ZA as inout_za with an alternate save/restore.
512 auto [NeededState, InsertPt] = getInstNeededZAState(*TRI, MI, SMEFnAttrs);
513 assert((InsertPt == MBBI || isCallStartOpcode(InsertPt->getOpcode())) &&
514 "Unexpected state change insertion point!");
515 // TODO: Do something to avoid state changes where NZCV is live.
516 if (MBBI == FirstTerminatorInsertPt)
517 Block.PhysLiveRegsAtExit = PhysLiveRegs;
518 if (MBBI == FirstNonPhiInsertPt)
519 Block.PhysLiveRegsAtEntry = PhysLiveRegs;
520 if (NeededState != ZAState::ANY)
521 Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs});
522 }
523
524 // Reverse vector (as we had to iterate backwards for liveness).
525 std::reverse(Block.Insts.begin(), Block.Insts.end());
526
527 // Record the desired states on entry/exit of this block. These are the
528 // states that would not incur a state transition.
529 if (!Block.Insts.empty()) {
530 Block.DesiredIncomingState = Block.Insts.front().NeededState;
531 Block.DesiredOutgoingState = Block.Insts.back().NeededState;
532 }
533 }
534
535 return FunctionInfo{std::move(Blocks), AfterSMEProloguePt,
536 PhysLiveRegsAfterSMEPrologue};
537}
538
539void MachineSMEABI::propagateDesiredStates(FunctionInfo &FnInfo,
540 bool Forwards) {
541 // If `Forwards`, this propagates desired states from predecessors to
542 // successors, otherwise, this propagates states from successors to
543 // predecessors.
544 auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & {
545 return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState;
546 };
547
549 for (auto [BlockID, BlockInfo] : enumerate(FnInfo.Blocks)) {
550 if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards)))
551 Worklist.push_back(MF->getBlockNumbered(BlockID));
552 }
553
554 while (!Worklist.empty()) {
555 MachineBasicBlock *MBB = Worklist.pop_back_val();
556 BlockInfo &Block = FnInfo.Blocks[MBB->getNumber()];
557
558 // Pick a legal edge bundle state that matches the majority of
559 // predecessors/successors.
560 int StateCounts[ZAState::NUM_ZA_STATE] = {0};
561 for (MachineBasicBlock *PredOrSucc :
562 Forwards ? predecessors(MBB) : successors(MBB)) {
563 BlockInfo &PredOrSuccBlock = FnInfo.Blocks[PredOrSucc->getNumber()];
564 ZAState ZAState = GetBlockState(PredOrSuccBlock, !Forwards);
565 if (isLegalEdgeBundleZAState(ZAState))
566 StateCounts[ZAState]++;
567 }
568
569 ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts);
570 ZAState &CurrentState = GetBlockState(Block, Forwards);
571 if (PropagatedState != CurrentState) {
572 CurrentState = PropagatedState;
573 ZAState &OtherState = GetBlockState(Block, !Forwards);
574 // Propagate to the incoming/outgoing state if that is also "ANY".
575 if (OtherState == ZAState::ANY)
576 OtherState = PropagatedState;
577 // Push any successors/predecessors that may need updating to the
578 // worklist.
579 for (MachineBasicBlock *SuccOrPred :
580 Forwards ? successors(MBB) : predecessors(MBB)) {
581 BlockInfo &SuccOrPredBlock = FnInfo.Blocks[SuccOrPred->getNumber()];
582 if (!isLegalEdgeBundleZAState(GetBlockState(SuccOrPredBlock, Forwards)))
583 Worklist.push_back(SuccOrPred);
584 }
585 }
586 }
587}
588
589/// Assigns each edge bundle a ZA state based on the needed states of blocks
590/// that have incoming or outgoing edges in that bundle.
592MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
593 const FunctionInfo &FnInfo) {
594 SmallVector<ZAState> BundleStates(Bundles.getNumBundles());
595 for (unsigned I = 0, E = Bundles.getNumBundles(); I != E; ++I) {
596 LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n');
597
598 // Attempt to assign a ZA state for this bundle that minimizes state
599 // transitions. Edges within loops are given a higher weight as we assume
600 // they will be executed more than once.
601 int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
602 for (unsigned BlockID : Bundles.getBlocks(I)) {
603 LLVM_DEBUG(dbgs() << "- bb." << BlockID);
604
605 const BlockInfo &Block = FnInfo.Blocks[BlockID];
606 bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I;
607 bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I;
608
609 bool LegalInEdge =
610 InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState);
611 bool LegalOutEgde =
612 OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState);
613 if (LegalInEdge) {
614 LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
615 << getZAStateString(Block.DesiredIncomingState));
616 EdgeStateCounts[Block.DesiredIncomingState]++;
617 }
618 if (LegalOutEgde) {
619 LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
620 << getZAStateString(Block.DesiredOutgoingState));
621 EdgeStateCounts[Block.DesiredOutgoingState]++;
622 }
623 if (!LegalInEdge && !LegalOutEgde)
624 LLVM_DEBUG(dbgs() << " (no state preference)");
625 LLVM_DEBUG(dbgs() << '\n');
626 }
627
628 ZAState BundleState =
629 ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
630
631 if (BundleState == ZAState::ANY)
632 BundleState = ZAState::ACTIVE;
633
634 LLVM_DEBUG({
635 dbgs() << "Chosen ZA state: " << getZAStateString(BundleState) << '\n'
636 << "Edge counts:";
637 for (auto [State, Count] : enumerate(EdgeStateCounts))
638 dbgs() << " " << getZAStateString(ZAState(State)) << ": " << Count;
639 dbgs() << "\n\n";
640 });
641
642 BundleStates[I] = BundleState;
643 }
644
645 return BundleStates;
646}
647
648std::pair<MachineBasicBlock::iterator, LiveRegs>
649MachineSMEABI::findStateChangeInsertionPoint(
650 MachineBasicBlock &MBB, const BlockInfo &Block,
652 LiveRegs PhysLiveRegs;
654 if (Inst != Block.Insts.end()) {
655 InsertPt = Inst->InsertPt;
656 PhysLiveRegs = Inst->PhysLiveRegs;
657 } else {
658 InsertPt = MBB.getFirstTerminator();
659 PhysLiveRegs = Block.PhysLiveRegsAtExit;
660 }
661
662 if (PhysLiveRegs == LiveRegs::None)
663 return {InsertPt, PhysLiveRegs}; // Nothing to do (no live regs).
664
665 // Find the previous state change. We can not move before this point.
666 MachineBasicBlock::iterator PrevStateChangeI;
667 if (Inst == Block.Insts.begin()) {
668 PrevStateChangeI = MBB.begin();
669 } else {
670 // Note: `std::prev(Inst)` is the previous InstInfo. We only create an
671 // InstInfo object for instructions that require a specific ZA state, so the
672 // InstInfo is the site of the previous state change in the block (which can
673 // be several MIs earlier).
674 PrevStateChangeI = std::prev(Inst)->InsertPt;
675 }
676
677 // Note: LiveUnits will only accurately track X0 and NZCV.
678 LiveRegUnits LiveUnits(*TRI);
679 setPhysLiveRegs(LiveUnits, PhysLiveRegs);
680 auto BestCandidate = std::make_pair(InsertPt, PhysLiveRegs);
681 for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) {
682 // Don't move before/into a call (which may have a state change before it).
683 if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall())
684 break;
685 LiveUnits.stepBackward(*I);
686 LiveRegs CurrentPhysLiveRegs = getPhysLiveRegs(LiveUnits);
687 // Find places where NZCV is available, but keep looking for locations where
688 // both NZCV and X0 are available, which can avoid some copies.
689 if (!(CurrentPhysLiveRegs & LiveRegs::NZCV))
690 BestCandidate = {I, CurrentPhysLiveRegs};
691 if (CurrentPhysLiveRegs == LiveRegs::None)
692 break;
693 }
694 return BestCandidate;
695}
696
697void MachineSMEABI::insertStateChanges(EmitContext &Context,
698 const FunctionInfo &FnInfo,
699 const EdgeBundles &Bundles,
700 ArrayRef<ZAState> BundleStates) {
701 for (MachineBasicBlock &MBB : *MF) {
702 const BlockInfo &Block = FnInfo.Blocks[MBB.getNumber()];
703 ZAState InState = BundleStates[Bundles.getBundle(MBB.getNumber(),
704 /*Out=*/false)];
705
706 ZAState CurrentState = Block.FixedEntryState;
707 if (CurrentState == ZAState::ANY)
708 CurrentState = InState;
709
710 for (auto &Inst : Block.Insts) {
711 if (CurrentState != Inst.NeededState) {
712 auto [InsertPt, PhysLiveRegs] =
713 findStateChangeInsertionPoint(MBB, Block, &Inst);
714 emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState,
715 PhysLiveRegs);
716 CurrentState = Inst.NeededState;
717 }
718 }
719
720 if (MBB.succ_empty())
721 continue;
722
723 ZAState OutState =
724 BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)];
725 if (CurrentState != OutState) {
726 auto [InsertPt, PhysLiveRegs] =
727 findStateChangeInsertionPoint(MBB, Block, Block.Insts.end());
728 emitStateChange(Context, MBB, InsertPt, CurrentState, OutState,
729 PhysLiveRegs);
730 }
731 }
732}
733
736 if (MBB.empty())
737 return DebugLoc();
738 return MBBI != MBB.end() ? MBBI->getDebugLoc() : MBB.back().getDebugLoc();
739}
740
741/// Finds the first call (as determined by MachineInstr::isCall()) starting from
742/// \p MBBI in \p MBB marked with \p Marker (which is a marker opcode such as
743/// RequiresZASavePseudo). If a marked call is found, it is pushed to \p Calls
744/// and the function returns true.
745static bool findMarkedCall(const MachineBasicBlock &MBB,
748 unsigned Marker, unsigned CallDestroyOpcode) {
749 auto IsMarker = [&](auto &MI) { return MI.getOpcode() == Marker; };
750 auto MarkerInst = std::find_if(MBBI, MBB.end(), IsMarker);
751 if (MarkerInst == MBB.end())
752 return false;
754 while (++I != MBB.end()) {
755 if (I->isCall() || I->getOpcode() == CallDestroyOpcode)
756 break;
757 }
758 if (I != MBB.end() && I->isCall())
759 Calls.push_back(&*I);
760 // Note: This function always returns true if a "Marker" was found.
761 return true;
762}
763
764void MachineSMEABI::collectReachableMarkedCalls(
765 const MachineBasicBlock &StartMBB,
767 SmallVectorImpl<const MachineInstr *> &Calls, unsigned Marker) const {
768 assert(Marker == AArch64::InOutZAUsePseudo ||
769 Marker == AArch64::RequiresZASavePseudo ||
770 Marker == AArch64::RequiresZT0SavePseudo);
771 unsigned CallDestroyOpcode = TII->getCallFrameDestroyOpcode();
772 if (findMarkedCall(StartMBB, StartInst, Calls, Marker, CallDestroyOpcode))
773 return;
774
777 StartMBB.succ_rend());
778 while (!Worklist.empty()) {
779 const MachineBasicBlock *MBB = Worklist.pop_back_val();
780 auto [_, Inserted] = Visited.insert(MBB);
781 if (!Inserted)
782 continue;
783
784 if (!findMarkedCall(*MBB, MBB->begin(), Calls, Marker, CallDestroyOpcode))
785 Worklist.append(MBB->succ_rbegin(), MBB->succ_rend());
786 }
787}
788
789static StringRef getCalleeName(const MachineInstr &CallInst) {
790 assert(CallInst.isCall() && "expected a call");
791 for (const MachineOperand &MO : CallInst.operands()) {
792 if (MO.isSymbol())
793 return MO.getSymbolName();
794 if (MO.isGlobal())
795 return MO.getGlobal()->getName();
796 }
797 return {};
798}
799
800void MachineSMEABI::emitCallSaveRemarks(const MachineBasicBlock &MBB,
802 DebugLoc DL, unsigned Marker,
803 StringRef RemarkName,
804 StringRef SaveName) const {
805 auto SaveRemark = [&](DebugLoc DL, const MachineBasicBlock &MBB) {
806 return MachineOptimizationRemarkAnalysis("sme", RemarkName, DL, &MBB);
807 };
808 StringRef StateName = Marker == AArch64::RequiresZT0SavePseudo ? "ZT0" : "ZA";
809 ORE->emit([&] {
810 return SaveRemark(DL, MBB) << SaveName << " of " << StateName
811 << " emitted in '" << MF->getName() << "'";
812 });
813 if (!ORE->allowExtraAnalysis("sme"))
814 return;
815 SmallVector<const MachineInstr *> CallsRequiringSaves;
816 collectReachableMarkedCalls(MBB, MBBI, CallsRequiringSaves, Marker);
817 for (const MachineInstr *CallInst : CallsRequiringSaves) {
818 auto R = SaveRemark(CallInst->getDebugLoc(), *CallInst->getParent());
819 R << "call";
820 if (StringRef CalleeName = getCalleeName(*CallInst); !CalleeName.empty())
821 R << " to '" << CalleeName << "'";
822 R << " requires " << StateName << " save";
823 ORE->emit(R);
824 }
825}
826
827void MachineSMEABI::emitSetupLazySave(EmitContext &Context,
831
832 emitCallSaveRemarks(MBB, MBBI, DL, AArch64::RequiresZASavePseudo,
833 "SMELazySaveZA", "lazy save");
834
835 // Get pointer to TPIDR2 block.
836 Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
837 Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
838 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
839 .addFrameIndex(Context.getTPIDR2Block(*MF))
840 .addImm(0)
841 .addImm(0);
842 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), TPIDR2Ptr)
843 .addReg(TPIDR2);
844 // Set TPIDR2_EL0 to point to TPIDR2 block.
845 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
846 .addImm(AArch64SysReg::TPIDR2_EL0)
847 .addReg(TPIDR2Ptr);
848}
849
850PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs,
853 DebugLoc DL) {
854 PhysRegSave RegSave{PhysLiveRegs};
855 if (PhysLiveRegs & LiveRegs::NZCV) {
856 RegSave.StatusFlags = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
857 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), RegSave.StatusFlags)
858 .addImm(AArch64SysReg::NZCV)
859 .addReg(AArch64::NZCV, RegState::Implicit);
860 }
861 // Note: Preserving X0 is "free" as this is before register allocation, so
862 // the register allocator is still able to optimize these copies.
863 if (PhysLiveRegs & LiveRegs::W0) {
864 RegSave.X0Save = MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI
865 ? &AArch64::GPR64RegClass
866 : &AArch64::GPR32RegClass);
867 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), RegSave.X0Save)
868 .addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0);
869 }
870 return RegSave;
871}
872
873void MachineSMEABI::restorePhyRegSave(const PhysRegSave &RegSave,
876 DebugLoc DL) {
877 if (RegSave.StatusFlags != AArch64::NoRegister)
878 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
879 .addImm(AArch64SysReg::NZCV)
880 .addReg(RegSave.StatusFlags)
881 .addReg(AArch64::NZCV, RegState::ImplicitDefine);
882
883 if (RegSave.X0Save != AArch64::NoRegister)
884 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY),
885 RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0)
886 .addReg(RegSave.X0Save);
887}
888
889void MachineSMEABI::addSMELibCall(MachineInstrBuilder &MIB, RTLIB::Libcall LC,
890 CallingConv::ID ExpectedCC) {
891 RTLIB::LibcallImpl LCImpl = LLI->getLibcallImpl(LC);
892 if (LCImpl == RTLIB::Unsupported)
893 emitError("cannot lower SME ABI (SME routines unsupported)");
896 if (CC != ExpectedCC)
897 emitError("invalid calling convention for SME routine: '" + ImplName + "'");
898 // FIXME: This assumes the ImplName StringRef is null-terminated.
899 MIB.addExternalSymbol(ImplName.data());
900 MIB.addRegMask(TRI->getCallPreservedMask(*MF, CC));
901}
902
903void MachineSMEABI::emitRestoreLazySave(EmitContext &Context,
906 LiveRegs PhysLiveRegs) {
908 Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
909 Register TPIDR2 = AArch64::X0;
910
911 // TODO: Emit these within the restore MBB to prevent unnecessary saves.
912 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
913
914 // Enable ZA.
915 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
916 .addImm(AArch64SVCR::SVCRZA)
917 .addImm(1);
918 // Get current TPIDR2_EL0.
919 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), TPIDR2EL0)
920 .addImm(AArch64SysReg::TPIDR2_EL0);
921 // Get pointer to TPIDR2 block.
922 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
923 .addFrameIndex(Context.getTPIDR2Block(*MF))
924 .addImm(0)
925 .addImm(0);
926 // (Conditionally) restore ZA state.
927 auto RestoreZA = BuildMI(MBB, MBBI, DL, TII->get(AArch64::RestoreZAPseudo))
928 .addReg(TPIDR2EL0)
929 .addReg(TPIDR2);
930 addSMELibCall(
931 RestoreZA, RTLIB::SMEABI_TPIDR2_RESTORE,
933 // Zero TPIDR2_EL0.
934 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
935 .addImm(AArch64SysReg::TPIDR2_EL0)
936 .addReg(AArch64::XZR);
937
938 restorePhyRegSave(RegSave, MBB, MBBI, DL);
939}
940
941void MachineSMEABI::emitZAMode(MachineBasicBlock &MBB,
943 bool ClearTPIDR2, bool On) {
945
946 if (ClearTPIDR2)
947 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
948 .addImm(AArch64SysReg::TPIDR2_EL0)
949 .addReg(AArch64::XZR);
950
951 // Disable ZA.
952 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
953 .addImm(AArch64SVCR::SVCRZA)
954 .addImm(On ? 1 : 0);
955}
956
957void MachineSMEABI::emitAllocateLazySaveBuffer(
958 EmitContext &Context, MachineBasicBlock &MBB,
960 MachineFrameInfo &MFI = MF->getFrameInfo();
962 Register SP = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
963 Register SVL = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
964 Register Buffer = AFI->getEarlyAllocSMESaveBuffer();
965
966 // Calculate SVL.
967 BuildMI(MBB, MBBI, DL, TII->get(AArch64::RDSVLI_XI), SVL).addImm(1);
968
969 // 1. Allocate the lazy save buffer.
970 if (Buffer == AArch64::NoRegister) {
971 // TODO: On Windows, we allocate the lazy save buffer in SelectionDAG (so
972 // Buffer != AArch64::NoRegister). This is done to reuse the existing
973 // expansions (which can insert stack checks). This works, but it means we
974 // will always allocate the lazy save buffer (even if the function contains
975 // no lazy saves). If we want to handle Windows here, we'll need to
976 // implement something similar to LowerWindowsDYNAMIC_STACKALLOC.
977 assert(!Subtarget->isTargetWindows() &&
978 "Lazy ZA save is not yet supported on Windows");
979 Buffer = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
980 // Get original stack pointer.
981 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), SP)
982 .addReg(AArch64::SP);
983 // Allocate a lazy-save buffer object of the size given, normally SVL * SVL
984 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSUBXrrr), Buffer)
985 .addReg(SVL)
986 .addReg(SVL)
987 .addReg(SP);
988 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::SP)
989 .addReg(Buffer);
990 // We have just allocated a variable sized object, tell this to PEI.
991 MFI.CreateVariableSizedObject(Align(16), nullptr);
992 }
993
994 // 2. Setup the TPIDR2 block.
995 {
996 // Note: This case just needs to do `SVL << 48`. It is not implemented as we
997 // generally don't support big-endian SVE/SME.
998 if (!Subtarget->isLittleEndian())
1000 "TPIDR2 block initialization is not supported on big-endian targets");
1001
1002 // Store buffer pointer and num_za_save_slices.
1003 // Bytes 10-15 are implicitly zeroed.
1004 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STPXi))
1005 .addReg(Buffer)
1006 .addReg(SVL)
1007 .addFrameIndex(Context.getTPIDR2Block(*MF))
1008 .addImm(0);
1009 }
1010}
1011
1012static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111;
1013
1014void MachineSMEABI::emitSMEPrologue(MachineBasicBlock &MBB,
1017
1018 bool ZeroZA = AFI->getSMEFnAttrs().isNewZA();
1019 bool ZeroZT0 = AFI->getSMEFnAttrs().isNewZT0();
1020 if (AFI->getSMEFnAttrs().hasPrivateZAInterface()) {
1021 // Get current TPIDR2_EL0.
1022 Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
1023 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS))
1024 .addReg(TPIDR2EL0, RegState::Define)
1025 .addImm(AArch64SysReg::TPIDR2_EL0);
1026 // If TPIDR2_EL0 is non-zero, commit the lazy save.
1027 // NOTE: Functions that only use ZT0 don't need to zero ZA.
1028 auto CommitZASave =
1029 BuildMI(MBB, MBBI, DL, TII->get(AArch64::CommitZASavePseudo))
1030 .addReg(TPIDR2EL0)
1031 .addImm(ZeroZA)
1032 .addImm(ZeroZT0);
1033 addSMELibCall(
1034 CommitZASave, RTLIB::SMEABI_TPIDR2_SAVE,
1036 if (ZeroZA)
1037 CommitZASave.addDef(AArch64::ZAB0, RegState::ImplicitDefine);
1038 if (ZeroZT0)
1039 CommitZASave.addDef(AArch64::ZT0, RegState::ImplicitDefine);
1040 // Enable ZA (as ZA could have previously been in the OFF state).
1041 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
1042 .addImm(AArch64SVCR::SVCRZA)
1043 .addImm(1);
1044 } else if (AFI->getSMEFnAttrs().hasSharedZAInterface()) {
1045 if (ZeroZA)
1046 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ZERO_M))
1048 .addDef(AArch64::ZAB0, RegState::ImplicitDefine);
1049 if (ZeroZT0)
1050 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ZERO_T)).addDef(AArch64::ZT0);
1051 }
1052}
1053
1054void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context,
1057 LiveRegs PhysLiveRegs, bool IsSave) {
1059
1060 if (IsSave)
1061 emitCallSaveRemarks(MBB, MBBI, DL, AArch64::RequiresZASavePseudo,
1062 "SMEFullZASave", "full save");
1063
1064 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
1065
1066 // Copy the buffer pointer into X0.
1067 Register BufferPtr = AArch64::X0;
1068 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
1069 .addReg(Context.getAgnosticZABufferPtr(*MF));
1070
1071 // Call __arm_sme_save/__arm_sme_restore.
1072 auto SaveRestoreZA = BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1073 .addReg(BufferPtr, RegState::Implicit);
1074 addSMELibCall(
1075 SaveRestoreZA,
1076 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE,
1078
1079 restorePhyRegSave(RegSave, MBB, MBBI, DL);
1080}
1081
1082void MachineSMEABI::emitZT0SaveRestore(EmitContext &Context,
1085 bool IsSave) {
1087
1088 // Note: This will report calls that _only_ need ZT0 saved. Call that save
1089 // both ZA and ZT0 will be under the SMELazySaveZA remark. This prevents
1090 // reporting the same calls twice.
1091 if (IsSave)
1092 emitCallSaveRemarks(MBB, MBBI, DL, AArch64::RequiresZT0SavePseudo,
1093 "SMEZT0Save", "spill");
1094
1095 Register ZT0Save = MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
1096
1097 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), ZT0Save)
1098 .addFrameIndex(Context.getZT0SaveSlot(*MF))
1099 .addImm(0)
1100 .addImm(0);
1101
1102 if (IsSave) {
1103 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STR_TX))
1104 .addReg(AArch64::ZT0)
1105 .addReg(ZT0Save);
1106 } else {
1107 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDR_TX), AArch64::ZT0)
1108 .addReg(ZT0Save);
1109 }
1110}
1111
1112void MachineSMEABI::emitAllocateFullZASaveBuffer(
1113 EmitContext &Context, MachineBasicBlock &MBB,
1115 // Buffer already allocated in SelectionDAG.
1116 if (AFI->getEarlyAllocSMESaveBuffer())
1117 return;
1118
1120 Register BufferPtr = Context.getAgnosticZABufferPtr(*MF);
1121 Register BufferSize = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
1122
1123 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
1124
1125 // Calculate the SME state size.
1126 {
1127 auto SMEStateSize = BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1128 .addReg(AArch64::X0, RegState::ImplicitDefine);
1129 addSMELibCall(
1130 SMEStateSize, RTLIB::SMEABI_SME_STATE_SIZE,
1132 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferSize)
1133 .addReg(AArch64::X0);
1134 }
1135
1136 // Allocate a buffer object of the size given __arm_sme_state_size.
1137 {
1138 MachineFrameInfo &MFI = MF->getFrameInfo();
1139 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1140 .addReg(AArch64::SP)
1141 .addReg(BufferSize)
1143 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
1144 .addReg(AArch64::SP);
1145
1146 // We have just allocated a variable sized object, tell this to PEI.
1147 MFI.CreateVariableSizedObject(Align(16), nullptr);
1148 }
1149
1150 restorePhyRegSave(RegSave, MBB, MBBI, DL);
1151}
1152
1153struct FromState {
1154 ZAState From;
1155
1156 constexpr uint8_t to(ZAState To) const {
1157 static_assert(NUM_ZA_STATE < 16, "expected ZAState to fit in 4-bits");
1158 return uint8_t(From) << 4 | uint8_t(To);
1159 }
1160};
1161
1162constexpr FromState transitionFrom(ZAState From) { return FromState{From}; }
1163
1164void MachineSMEABI::emitStateChange(EmitContext &Context,
1167 ZAState From, ZAState To,
1168 LiveRegs PhysLiveRegs) {
1169 // ZA not used.
1170 if (From == ZAState::ANY || To == ZAState::ANY)
1171 return;
1172
1173 // If we're exiting from the ENTRY state that means that the function has not
1174 // used ZA, so in the case of private ZA/ZT0 functions we can omit any set up.
1175 if (From == ZAState::ENTRY && To == ZAState::OFF)
1176 return;
1177
1178 // TODO: Avoid setting up the save buffer if there's no transition to
1179 // LOCAL_SAVED.
1180 if (From == ZAState::ENTRY) {
1181 assert(&MBB == &MBB.getParent()->front() &&
1182 "ENTRY state only valid in entry block");
1183 emitSMEPrologue(MBB, MBB.getFirstNonPHI());
1184 if (To == ZAState::ACTIVE)
1185 return; // Nothing more to do (ZA is active after the prologue).
1186
1187 // Note: "emitNewZAPrologue" zeros ZA, so we may need to setup a lazy save
1188 // if "To" is "ZAState::LOCAL_SAVED". It may be possible to improve this
1189 // case by changing the placement of the zero instruction.
1190 From = ZAState::ACTIVE;
1191 }
1192
1193 SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
1194 bool IsAgnosticZA = SMEFnAttrs.hasAgnosticZAInterface();
1195 bool HasZT0State = SMEFnAttrs.hasZT0State();
1196 bool HasZAState = IsAgnosticZA || SMEFnAttrs.hasZAState();
1197
1198 switch (transitionFrom(From).to(To)) {
1199 // This section handles: ACTIVE <-> ACTIVE_ZT0_SAVED
1200 case transitionFrom(ZAState::ACTIVE).to(ZAState::ACTIVE_ZT0_SAVED):
1201 emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/true);
1202 break;
1203 case transitionFrom(ZAState::ACTIVE_ZT0_SAVED).to(ZAState::ACTIVE):
1204 emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/false);
1205 break;
1206
1207 // This section handles: ACTIVE[_ZT0_SAVED] -> LOCAL_SAVED
1208 case transitionFrom(ZAState::ACTIVE).to(ZAState::LOCAL_SAVED):
1209 case transitionFrom(ZAState::ACTIVE_ZT0_SAVED).to(ZAState::LOCAL_SAVED):
1210 if (HasZT0State && From == ZAState::ACTIVE)
1211 emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/true);
1212 if (HasZAState)
1213 emitZASave(Context, MBB, InsertPt, PhysLiveRegs);
1214 break;
1215
1216 // This section handles: ACTIVE -> LOCAL_COMMITTED
1217 case transitionFrom(ZAState::ACTIVE).to(ZAState::LOCAL_COMMITTED):
1218 // TODO: We could support ZA state here, but this transition is currently
1219 // only possible when we _don't_ have ZA state.
1220 assert(HasZT0State && !HasZAState && "Expect to only have ZT0 state.");
1221 emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/true);
1222 emitZAMode(MBB, InsertPt, /*ClearTPIDR2=*/false, /*On=*/false);
1223 break;
1224
1225 // This section handles: LOCAL_COMMITTED -> (OFF|LOCAL_SAVED)
1226 case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::OFF):
1227 case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::LOCAL_SAVED):
1228 // These transistions are a no-op.
1229 break;
1230
1231 // This section handles: LOCAL_(SAVED|COMMITTED) -> ACTIVE[_ZT0_SAVED]
1232 case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::ACTIVE):
1233 case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::ACTIVE_ZT0_SAVED):
1234 case transitionFrom(ZAState::LOCAL_SAVED).to(ZAState::ACTIVE):
1235 if (HasZAState)
1236 emitZARestore(Context, MBB, InsertPt, PhysLiveRegs);
1237 else
1238 emitZAMode(MBB, InsertPt, /*ClearTPIDR2=*/false, /*On=*/true);
1239 if (HasZT0State && To == ZAState::ACTIVE)
1240 emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/false);
1241 break;
1242
1243 // This section handles transistions to OFF (not previously covered)
1244 case transitionFrom(ZAState::ACTIVE).to(ZAState::OFF):
1245 case transitionFrom(ZAState::ACTIVE_ZT0_SAVED).to(ZAState::OFF):
1246 case transitionFrom(ZAState::LOCAL_SAVED).to(ZAState::OFF):
1247 assert(SMEFnAttrs.hasPrivateZAInterface() &&
1248 "Did not expect to turn ZA off in shared/agnostic ZA function");
1249 emitZAMode(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED,
1250 /*On=*/false);
1251 break;
1252
1253 default:
1254 dbgs() << "Error: Transition from " << getZAStateString(From) << " to "
1255 << getZAStateString(To) << '\n';
1256 llvm_unreachable("Unimplemented state transition");
1257 }
1258}
1259
1260} // end anonymous namespace
1261
1262INITIALIZE_PASS(MachineSMEABI, "aarch64-machine-sme-abi", "Machine SME ABI",
1263 false, false)
1264
1265bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
1266 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
1267 if (!Subtarget->hasSME())
1268 return false;
1269
1270 AFI = MF.getInfo<AArch64FunctionInfo>();
1271 SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
1272 if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State() &&
1273 !SMEFnAttrs.hasAgnosticZAInterface())
1274 return false;
1275
1276 assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!");
1277
1278 this->MF = &MF;
1279 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
1280 LLI = &getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1281 *MF.getFunction().getParent(), *Subtarget);
1282 TII = Subtarget->getInstrInfo();
1283 TRI = Subtarget->getRegisterInfo();
1284 MRI = &MF.getRegInfo();
1285
1286 const EdgeBundles &Bundles =
1287 getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
1288
1289 FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs);
1290
1291 if (OptLevel != CodeGenOptLevel::None) {
1292 // Propagate desired states forward, then backwards. Most of the propagation
1293 // should be done in the forward step, and backwards propagation is then
1294 // used to fill in the gaps. Note: Doing both in one step can give poor
1295 // results. For example, consider this subgraph:
1296 //
1297 // ┌─────┐
1298 // ┌─┤ BB0 ◄───┐
1299 // │ └─┬───┘ │
1300 // │ ┌─▼───◄──┐│
1301 // │ │ BB1 │ ││
1302 // │ └─┬┬──┘ ││
1303 // │ │└─────┘│
1304 // │ ┌─▼───┐ │
1305 // │ │ BB2 ├───┘
1306 // │ └─┬───┘
1307 // │ ┌─▼───┐
1308 // └─► BB3 │
1309 // └─────┘
1310 //
1311 // If:
1312 // - "BB0" and "BB2" (outer loop) has no state preference
1313 // - "BB1" (inner loop) desires the ACTIVE state on entry/exit
1314 // - "BB3" desires the LOCAL_SAVED state on entry
1315 //
1316 // If we propagate forwards first, ACTIVE is propagated from BB1 to BB2,
1317 // then from BB2 to BB0. Which results in the inner and outer loops having
1318 // the "ACTIVE" state. This avoids any state changes in the loops.
1319 //
1320 // If we propagate backwards first, we _could_ propagate LOCAL_SAVED from
1321 // BB3 to BB0, which would result in a transition from ACTIVE -> LOCAL_SAVED
1322 // in the outer loop.
1323 for (bool Forwards : {true, false})
1324 propagateDesiredStates(FnInfo, Forwards);
1325 }
1326
1327 SmallVector<ZAState> BundleStates = assignBundleZAStates(Bundles, FnInfo);
1328
1329 EmitContext Context;
1330 insertStateChanges(Context, FnInfo, Bundles, BundleStates);
1331
1332 if (Context.needsSaveBuffer()) {
1333 if (FnInfo.AfterSMEProloguePt) {
1334 // Note: With inline stack probes the AfterSMEProloguePt may not be in the
1335 // entry block (due to the probing loop).
1336 MachineBasicBlock::iterator MBBI = *FnInfo.AfterSMEProloguePt;
1337 emitAllocateZASaveBuffer(Context, *MBBI->getParent(), MBBI,
1338 FnInfo.PhysLiveRegsAfterSMEPrologue);
1339 } else {
1340 MachineBasicBlock &EntryBlock = MF.front();
1341 emitAllocateZASaveBuffer(
1342 Context, EntryBlock, EntryBlock.getFirstNonPHI(),
1343 FnInfo.Blocks[EntryBlock.getNumber()].PhysLiveRegsAtEntry);
1344 }
1345 }
1346
1347 return true;
1348}
1349
1351 return new MachineSMEABI(OptLevel);
1352}
unsigned const MachineRegisterInfo * MRI
static constexpr unsigned ZERO_ALL_ZA_MASK
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define ENTRY(ASMNAME, ENUM)
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
#define MAKE_CASE(V)
Register const TargetRegisterInfo * TRI
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
This class represents a function call, abstracting a target machine's calling convention.
A debug info location.
Definition DebugLoc.h:123
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
Definition EdgeBundles.h:53
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition EdgeBundles.h:47
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition EdgeBundles.h:50
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Tracks which library functions to use for a particular subtarget.
LLVM_ABI CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
MachineInstrBundleIterator< const MachineInstr > const_iterator
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
succ_reverse_iterator succ_rbegin()
MachineInstrBundleIterator< MachineInstr > iterator
succ_reverse_iterator succ_rend()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
Diagnostic information for optimization analysis remarks.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to be more informative.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasAgnosticZAInterface() const
bool hasPrivateZAInterface() const
bool hasSharedZAInterface() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
op_range operands()
Definition User.h:267
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0
Preserve X0-X13, X19-X29, SP, Z0-Z31, P0-P15.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
auto successors(const MachineBasicBlock *BB)
FunctionPass * createMachineSMEABIPass(CodeGenOptLevel)
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
@ Default
-O2, -Os, -Oz
Definition CodeGen.h:85
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2078
auto predecessors(const MachineBasicBlock *BB)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.