39#define DEBUG_TYPE "x86-fast-pre-tile-config"
46class X86FastPreTileConfigImpl {
48 X86FastPreTileConfigImpl() : StackSlotForVirtReg(-1) {}
49 bool runOnMachineFunction(MachineFunction &MF);
52 MachineFunction *MF =
nullptr;
53 const X86Subtarget *ST =
nullptr;
54 const TargetInstrInfo *TII =
nullptr;
55 MachineRegisterInfo *MRI =
nullptr;
56 X86MachineFunctionInfo *X86FI =
nullptr;
57 MachineFrameInfo *MFI =
nullptr;
58 const TargetRegisterInfo *TRI =
nullptr;
59 MachineBasicBlock *MBB =
nullptr;
66 DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
69 IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
73 BitVector MayLiveAcrossBlocks;
75 int getStackSpaceFor(
Register VirtReg);
76 void InitializeTileConfigStackSpace();
77 bool mayLiveOut(
Register VirtReg, MachineInstr *CfgMI);
80 MachineOperand *RowMO, MachineOperand *ColMO);
81 void canonicalizePHIs(MachineBasicBlock &MBB);
82 void convertPHI(MachineBasicBlock *MBB, MachineInstr &
PHI);
83 void convertPHIs(MachineBasicBlock &MBB);
84 bool configBasicBlock(MachineBasicBlock &MBB);
89 X86FastPreTileConfigLegacy() : MachineFunctionPass(ID) {}
92 StringRef getPassName()
const override {
93 return "Fast Tile Register Preconfigure";
97 bool runOnMachineFunction(MachineFunction &MFunc)
override;
104char X86FastPreTileConfigLegacy::ID = 0;
107 "Fast Tile Register Preconfigure",
false,
false)
114 auto MBBEnd =
MBB.end();
119 for (; &*
I !=
A && &*
I !=
B; ++
I)
127int X86FastPreTileConfigImpl::getStackSpaceFor(
Register VirtReg) {
129 int SS = StackSlotForVirtReg[VirtReg];
135 const TargetRegisterClass &RC = *
MRI->getRegClass(VirtReg);
136 unsigned Size =
TRI->getSpillSize(RC);
137 Align Alignment =
TRI->getSpillAlign(RC);
141 StackSlotForVirtReg[VirtReg] = FrameIdx;
148bool X86FastPreTileConfigImpl::mayLiveOut(
Register VirtReg,
149 MachineInstr *CfgMI) {
153 for (
const MachineInstr &UseInst :
MRI->use_nodbg_instructions(VirtReg)) {
154 if (UseInst.getParent() !=
MBB) {
173void X86FastPreTileConfigImpl::InitializeTileConfigStackSpace() {
174 MachineBasicBlock &
MBB = MF->
front();
178 Register Zmm =
MRI->createVirtualRegister(&X86::VR512RegClass);
183 Register Ymm =
MRI->createVirtualRegister(&X86::VR256RegClass);
192 unsigned StoreOpc = ST->
hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
193 Register Xmm =
MRI->createVirtualRegister(&X86::VR128RegClass);
214 int FI = getStackSpaceFor(VirtReg);
217 const TargetRegisterClass &RC = *
MRI->getRegClass(VirtReg);
228 Register OrigReg, MachineOperand *RowMO,
229 MachineOperand *ColMO) {
230 int FI = getStackSpaceFor(OrigReg);
231 const TargetRegisterClass &RC = *
MRI->getRegClass(OrigReg);
244 TileReg =
MRI->createVirtualRegister(&RC);
248 unsigned Opc = X86::PTILELOADDV;
249 Register StrideReg =
MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
252 TII->get(X86::MOV64ri), StrideReg)
281 if (
Reg.isVirtual() &&
282 (
MRI->getRegClass(
Reg)->getID() == X86::TILERegClassID)) {
286 if (
Reg >= X86::TMM0 &&
Reg <= X86::TMM7)
294 if (
MI.isDebugInstr() ||
MI.getNumOperands() < 3 || !
MI.isPseudo())
310 }
else if (
MI->isCopy()) {
311 TileReg =
MI->getOperand(1).getReg();
317 assert(
MI->isPHI() &&
"Unexpected PHI when get shape.");
335void X86FastPreTileConfigImpl::convertPHI(MachineBasicBlock *
MBB,
342 Register StackAddrReg =
MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
344 TII->get(X86::PHI), StackAddrReg);
345 Register RowReg =
MRI->createVirtualRegister(&X86::GR16RegClass);
347 TII->get(X86::PHI), RowReg);
348 Register ColReg =
MRI->createVirtualRegister(&X86::GR16RegClass);
350 TII->get(X86::PHI), ColReg);
352 VisitedPHIs[&
PHI] = {RowReg, ColReg, StackAddrReg};
354 for (
unsigned I = 1,
E =
PHI.getNumOperands();
I !=
E;
I += 2) {
361 MachineBasicBlock *InMBB =
PHI.getOperand(
I + 1).getMBB();
363 MachineInstr *TileDefMI =
MRI->getVRegDef(InTileReg);
365 if (TileDefMI->
isPHI()) {
367 if (
auto It = VisitedPHIs.find(TileDefMI);
368 It != VisitedPHIs.end()) {
380 Register InStackAddrReg = It->second.StackAddr;
387 convertPHI(TileDefMI->
getParent(), *TileDefMI);
390 MachineInstr *TileLoad =
MRI->getVRegDef(InTileReg);
411 int FI = getStackSpaceFor(InTileReg);
413 MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
415 TII->get(X86::LEA64r), InStackAddrReg)
423 Register StrideReg =
MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
435 PHI.eraseFromParent();
436 VisitedPHIs.erase(&
PHI);
446void X86FastPreTileConfigImpl::canonicalizePHIs(MachineBasicBlock &
MBB) {
447 SmallVector<MachineInstr *, 8> PHIs;
449 for (MachineInstr &
MI :
MBB) {
466 while (!PHIs.
empty()) {
471 MachineOperand *InMO =
nullptr;
472 MachineInstr *
DefMI =
nullptr;
473 for (
unsigned I = 1,
E =
PHI->getNumOperands();
I !=
E;
I += 2) {
475 MachineBasicBlock *InMBB =
PHI->getOperand(
I + 1).getMBB();
480 InMO = &
PHI->getOperand(
I);
491 MachineBasicBlock *InMBB =
PHI->getOperand(
I + 1).getMBB();
501void X86FastPreTileConfigImpl::convertPHIs(MachineBasicBlock &
MBB) {
502 SmallVector<MachineInstr *, 8> PHIs;
503 for (MachineInstr &
MI :
MBB) {
510 while (!PHIs.
empty()) {
513 convertPHI(&
MBB, *
MI);
519bool X86FastPreTileConfigImpl::configBasicBlock(MachineBasicBlock &
MBB) {
522 MachineInstr *LastShapeMI =
nullptr;
523 MachineInstr *LastTileCfg =
nullptr;
524 bool HasUnconfigTile =
false;
526 auto Config = [&](MachineInstr &Before) {
532 LastShapeMI =
nullptr;
535 auto HasTileOperand = [](MachineRegisterInfo *
MRI, MachineInstr &
MI) {
536 for (
const MachineOperand &MO :
MI.operands()) {
561 if (HasTileOperand(
MRI,
MI))
562 HasUnconfigTile =
true;
565 if (
MI.isCall() && HasUnconfigTile) {
572 auto UseIt =
MI.getIterator();
573 while (UseIt !=
MBB.
end()) {
574 if (HasTileOperand(
MRI, *UseIt))
581 HasUnconfigTile =
false;
623 MachineOperand *RowMO = &
MI.getOperand(1);
624 MachineOperand *ColMO = &
MI.getOperand(2);
625 MachineInstr *RowMI =
MRI->getVRegDef(RowMO->
getReg());
626 MachineInstr *ColMI =
MRI->getVRegDef(ColMO->
getReg());
645 if (mayLiveOut(TileReg, LastTileCfg))
646 spill(++
MI.getIterator(), TileReg,
false);
647 for (MachineInstr &
UseMI :
MRI->use_instructions(TileReg)) {
653 reload(
UseMI.getIterator(), TileReg, RowMO, ColMO);
658 reload(
UseMI.getIterator(), TileReg, RowMO, ColMO);
664 if (HasUnconfigTile) {
665 MachineInstr *Before;
666 if (LastShapeMI ==
nullptr || LastShapeMI->
isPHI())
677bool X86FastPreTileConfigImpl::runOnMachineFunction(MachineFunction &MFunc) {
678 X86FI = MFunc.
getInfo<X86MachineFunctionInfo>();
691 unsigned NumVirtRegs =
MRI->getNumVirtRegs();
693 StackSlotForVirtReg.resize(NumVirtRegs);
694 MayLiveAcrossBlocks.
clear();
698 MayLiveAcrossBlocks.
resize(NumVirtRegs * 3);
703 for (MachineBasicBlock &
MBB : MFunc)
704 canonicalizePHIs(
MBB);
709 ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
710 for (MachineBasicBlock *
MBB : RPOT) {
712 Change |= configBasicBlock(*
MBB);
716 InitializeTileConfigStackSpace();
718 StackSlotForVirtReg.clear();
723 return new X86FastPreTileConfigLegacy();
726bool X86FastPreTileConfigLegacy::runOnMachineFunction(
MachineFunction &MF) {
727 X86FastPreTileConfigImpl Impl;
728 return Impl.runOnMachineFunction(MF);
734 X86FastPreTileConfigImpl Impl;
735 bool Changed = Impl.runOnMachineFunction(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI)
static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg)
static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI)
static bool isTileRegister(MachineRegisterInfo *MRI, Register Reg)
bool test(unsigned Idx) const
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void clear()
clear - Removes all bits from the bitvector.
FunctionPass class - This class is used to implement most global optimizations.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Store the specified register of the given register class to the specified stack frame index.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
unsigned virtRegIndex() const
Convert a virtual register number to a 0-based index.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
MachineOperand * getRow() const
MachineOperand * getCol() const
void push_back(const T &Elt)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMXProgModelEnum getAMXProgModel() const
unsigned getTileConfigSize() const
Align getTileConfigAlignment() const
const X86InstrInfo * getInstrInfo() const override
const X86RegisterInfo * getRegisterInfo() const override
self_iterator getIterator()
Pass manager infrastructure for declaring and invalidating analyses.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
FunctionPass * createX86FastPreTileConfigLegacyPass()
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, Register Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.