71#define DEBUG_TYPE "aarch64-machine-sme-abi"
109 Register StatusFlags = AArch64::NoRegister;
110 Register X0Save = AArch64::NoRegister;
116 ZAState NeededState{ZAState::ANY};
118 LiveRegs PhysLiveRegs = LiveRegs::None;
124 ZAState FixedEntryState{ZAState::ANY};
126 LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
127 LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
133 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
134 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
141 EmitContext() =
default;
146 return *TPIDR2BlockFI;
149 return *TPIDR2BlockFI;
154 if (AgnosticZABufferPtr != AArch64::NoRegister)
155 return AgnosticZABufferPtr;
158 AgnosticZABufferPtr =
159 BufferPtr != AArch64::NoRegister
162 return AgnosticZABufferPtr;
167 bool needsSaveBuffer()
const {
168 assert(!(TPIDR2BlockFI && AgnosticZABufferPtr) &&
169 "Cannot have both a TPIDR2 block and agnostic ZA buffer");
170 return TPIDR2BlockFI || AgnosticZABufferPtr != AArch64::NoRegister;
174 std::optional<int> TPIDR2BlockFI;
175 Register AgnosticZABufferPtr = AArch64::NoRegister;
178static bool isLegalEdgeBundleZAState(ZAState State) {
180 case ZAState::ACTIVE:
181 case ZAState::LOCAL_SAVED:
188StringRef getZAStateString(ZAState State) {
189#define MAKE_CASE(V) \
209 return AArch64::MPR128RegClass.contains(SR) ||
210 AArch64::ZTRRegClass.contains(SR);
216static std::pair<ZAState, MachineBasicBlock::iterator>
218 bool ZAOffAtReturn) {
221 if (
MI.getOpcode() == AArch64::InOutZAUsePseudo)
222 return {ZAState::ACTIVE, std::prev(InsertPt)};
224 if (
MI.getOpcode() == AArch64::RequiresZASavePseudo)
225 return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
228 return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
230 for (
auto &MO :
MI.operands()) {
231 if (isZAorZTRegOp(
TRI, MO))
232 return {ZAState::ACTIVE, InsertPt};
235 return {ZAState::ANY, InsertPt};
239 inline static char ID = 0;
245 StringRef getPassName()
const override {
return "Machine SME ABI pass"; }
257 FunctionInfo collectNeededZAStates(
SMEAttrs SMEFnAttrs);
262 const FunctionInfo &FnInfo);
266 void insertStateChanges(EmitContext &,
const FunctionInfo &FnInfo,
292 LiveRegs PhysLiveRegs,
bool IsSave);
304 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
305 return emitFullZASaveRestore(Context,
MBB,
MBBI, PhysLiveRegs,
307 return emitSetupLazySave(Context,
MBB,
MBBI);
311 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
312 return emitFullZASaveRestore(Context,
MBB,
MBBI, PhysLiveRegs,
314 return emitRestoreLazySave(Context,
MBB,
MBBI, PhysLiveRegs);
319 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
320 return emitAllocateFullZASaveBuffer(Context,
MBB,
MBBI, PhysLiveRegs);
321 return emitAllocateLazySaveBuffer(Context,
MBB,
MBBI);
340FunctionInfo MachineSMEABI::collectNeededZAStates(
SMEAttrs SMEFnAttrs) {
343 "Expected function to have ZA/ZT0 state!");
346 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
347 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
350 BlockInfo &
Block = Blocks[
MBB.getNumber()];
352 if (
MBB.isEntryBlock()) {
355 ? ZAState::CALLER_DORMANT
357 }
else if (
MBB.isEHPad()) {
359 Block.FixedEntryState = ZAState::LOCAL_SAVED;
365 auto GetPhysLiveRegs = [&] {
366 LiveRegs PhysLiveRegs = LiveRegs::None;
368 PhysLiveRegs |= LiveRegs::NZCV;
372 PhysLiveRegs |= LiveRegs::W0;
373 if (!LiveUnits.
available(AArch64::W0_HI))
374 PhysLiveRegs |= LiveRegs::W0_HI;
378 Block.PhysLiveRegsAtExit = GetPhysLiveRegs();
379 auto FirstTerminatorInsertPt =
MBB.getFirstTerminator();
380 auto FirstNonPhiInsertPt =
MBB.getFirstNonPHI();
384 LiveRegs PhysLiveRegs = GetPhysLiveRegs();
389 if (
MI.getOpcode() == AArch64::SMEStateAllocPseudo) {
390 AfterSMEProloguePt =
MBBI;
391 PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
394 auto [NeededState, InsertPt] = getZAStateBeforeInst(
397 InsertPt->getOpcode() == AArch64::ADJCALLSTACKDOWN) &&
398 "Unexpected state change insertion point!");
400 if (
MBBI == FirstTerminatorInsertPt)
401 Block.PhysLiveRegsAtExit = PhysLiveRegs;
402 if (
MBBI == FirstNonPhiInsertPt)
403 Block.PhysLiveRegsAtEntry = PhysLiveRegs;
404 if (NeededState != ZAState::ANY)
405 Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs});
409 std::reverse(
Block.Insts.begin(),
Block.Insts.end());
412 return FunctionInfo{std::move(Blocks), AfterSMEProloguePt,
413 PhysLiveRegsAfterSMEPrologue};
419MachineSMEABI::assignBundleZAStates(
const EdgeBundles &Bundles,
420 const FunctionInfo &FnInfo) {
423 LLVM_DEBUG(
dbgs() <<
"Assigning ZA state for edge bundle: " <<
I <<
'\n');
430 int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
431 for (
unsigned BlockID : Bundles.
getBlocks(
I)) {
434 const BlockInfo &
Block = FnInfo.Blocks[BlockID];
435 if (
Block.Insts.empty()) {
439 bool InEdge = Bundles.
getBundle(BlockID,
false) ==
I;
440 bool OutEdge = Bundles.
getBundle(BlockID,
true) ==
I;
442 ZAState DesiredIncomingState =
Block.Insts.front().NeededState;
443 if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
444 EdgeStateCounts[DesiredIncomingState]++;
446 << getZAStateString(DesiredIncomingState));
448 ZAState DesiredOutgoingState =
Block.Insts.back().NeededState;
449 if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) {
450 EdgeStateCounts[DesiredOutgoingState]++;
452 << getZAStateString(DesiredOutgoingState));
457 ZAState BundleState =
458 ZAState(
max_element(EdgeStateCounts) - EdgeStateCounts);
462 if (BundleState == ZAState::ANY)
463 BundleState = ZAState::ACTIVE;
466 dbgs() <<
"Chosen ZA state: " << getZAStateString(BundleState) <<
'\n'
469 dbgs() <<
" " << getZAStateString(ZAState(State)) <<
": " <<
Count;
473 BundleStates[
I] = BundleState;
479void MachineSMEABI::insertStateChanges(EmitContext &Context,
480 const FunctionInfo &FnInfo,
484 const BlockInfo &
Block = FnInfo.Blocks[
MBB.getNumber()];
485 ZAState InState = BundleStates[Bundles.
getBundle(
MBB.getNumber(),
488 ZAState CurrentState =
Block.FixedEntryState;
489 if (CurrentState == ZAState::ANY)
490 CurrentState = InState;
492 for (
auto &Inst :
Block.Insts) {
493 if (CurrentState != Inst.NeededState)
494 emitStateChange(Context,
MBB, Inst.InsertPt, CurrentState,
495 Inst.NeededState, Inst.PhysLiveRegs);
496 CurrentState = Inst.NeededState;
499 if (
MBB.succ_empty())
504 if (CurrentState != OutState)
505 emitStateChange(Context,
MBB,
MBB.getFirstTerminator(), CurrentState,
506 OutState,
Block.PhysLiveRegsAtExit);
513 return MBBI->getDebugLoc();
517void MachineSMEABI::emitSetupLazySave(EmitContext &Context,
523 Register TPIDR2 =
MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
524 Register TPIDR2Ptr =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
533 .
addImm(AArch64SysReg::TPIDR2_EL0)
537PhysRegSave MachineSMEABI::createPhysRegSave(
LiveRegs PhysLiveRegs,
541 PhysRegSave RegSave{PhysLiveRegs};
542 if (PhysLiveRegs & LiveRegs::NZCV) {
543 RegSave.StatusFlags =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
545 .
addImm(AArch64SysReg::NZCV)
550 if (PhysLiveRegs & LiveRegs::W0) {
551 RegSave.X0Save =
MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI
552 ? &AArch64::GPR64RegClass
553 : &AArch64::GPR32RegClass);
555 .
addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0);
560void MachineSMEABI::restorePhyRegSave(
const PhysRegSave &RegSave,
564 if (RegSave.StatusFlags != AArch64::NoRegister)
566 .
addImm(AArch64SysReg::NZCV)
567 .
addReg(RegSave.StatusFlags)
570 if (RegSave.X0Save != AArch64::NoRegister)
572 RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0)
576void MachineSMEABI::emitRestoreLazySave(EmitContext &Context,
582 Register TPIDR2EL0 =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
586 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs,
MBB,
MBBI,
DL);
590 .
addImm(AArch64SVCR::SVCRZA)
594 .
addImm(AArch64SysReg::TPIDR2_EL0);
605 .
addRegMask(
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
608 .
addImm(AArch64SysReg::TPIDR2_EL0)
611 restorePhyRegSave(RegSave,
MBB,
MBBI,
DL);
621 .
addImm(AArch64SysReg::TPIDR2_EL0)
626 .
addImm(AArch64SVCR::SVCRZA)
630void MachineSMEABI::emitAllocateLazySaveBuffer(
635 Register SP =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
636 Register SVL =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
643 if (Buffer == AArch64::NoRegister) {
651 "Lazy ZA save is not yet supported on Windows");
652 Buffer =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
673 "TPIDR2 block initialization is not supported on big-endian targets");
691 Register TPIDR2EL0 =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
694 .
addImm(AArch64SysReg::TPIDR2_EL0);
703 .
addRegMask(
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
708 .
addImm(AArch64SVCR::SVCRZA)
712void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context,
715 LiveRegs PhysLiveRegs,
bool IsSave) {
720 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs,
MBB,
MBBI,
DL);
724 .
addReg(Context.getAgnosticZABufferPtr(*MF));
730 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE))
735 restorePhyRegSave(RegSave,
MBB,
MBBI,
DL);
738void MachineSMEABI::emitAllocateFullZASaveBuffer(
746 Register BufferPtr = Context.getAgnosticZABufferPtr(*MF);
747 Register BufferSize =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
749 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs,
MBB,
MBBI,
DL);
779 restorePhyRegSave(RegSave,
MBB,
MBBI,
DL);
782void MachineSMEABI::emitStateChange(EmitContext &Context,
785 ZAState From, ZAState To,
788 if (From == ZAState::ANY || To == ZAState::ANY)
793 if (From == ZAState::CALLER_DORMANT && To == ZAState::OFF)
798 if (From == ZAState::CALLER_DORMANT) {
800 "CALLER_DORMANT state requires private ZA interface");
802 "CALLER_DORMANT state only valid in entry block");
803 emitNewZAPrologue(
MBB,
MBB.getFirstNonPHI());
804 if (To == ZAState::ACTIVE)
810 From = ZAState::ACTIVE;
813 if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
814 emitZASave(Context,
MBB, InsertPt, PhysLiveRegs);
815 else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
816 emitZARestore(Context,
MBB, InsertPt, PhysLiveRegs);
817 else if (To == ZAState::OFF) {
818 assert(From != ZAState::CALLER_DORMANT &&
819 "CALLER_DORMANT to OFF should have already been handled");
821 "Should not turn ZA off in agnostic ZA function");
822 emitZAOff(
MBB, InsertPt, From == ZAState::LOCAL_SAVED);
824 dbgs() <<
"Error: Transition from " << getZAStateString(From) <<
" to "
825 << getZAStateString(To) <<
'\n';
840 SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
841 if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State() &&
842 !SMEFnAttrs.hasAgnosticZAInterface())
845 assert(MF.getRegInfo().isSSA() &&
"Expected to be run on SSA form!");
849 TII = Subtarget->getInstrInfo();
850 TRI = Subtarget->getRegisterInfo();
851 MRI = &MF.getRegInfo();
854 getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
856 FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs);
860 insertStateChanges(Context, FnInfo, Bundles, BundleStates);
862 if (Context.needsSaveBuffer()) {
863 if (FnInfo.AfterSMEProloguePt) {
867 emitAllocateZASaveBuffer(Context, *
MBBI->getParent(),
MBBI,
868 FnInfo.PhysLiveRegsAfterSMEPrologue);
871 emitAllocateZASaveBuffer(
873 FnInfo.Blocks[EntryBlock.
getNumber()].PhysLiveRegsAtEntry);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Register getEarlyAllocSMESaveBuffer() const
SMEAttrs getSMEFnAttrs() const
bool isTargetWindows() const
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasAgnosticZAInterface() const
bool hasPrivateZAInterface() const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createMachineSMEABIPass()
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
@ LLVM_MARK_AS_BITMASK_ENUM
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
This struct is a compact representation of a valid (non-zero power of two) alignment.