Go to the documentation of this file.
32 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \
33 "AArch64 homogeneous prolog/epilog lowering pass"
37 cl::desc(
"The minimum number of instructions that are outlined in a frame "
38 "helper (default = 2)"));
42 class AArch64LowerHomogeneousPE {
72 class AArch64LowerHomogeneousPrologEpilog :
public ModulePass {
86 bool runOnModule(
Module &M)
override;
98 "aarch64-lower-homogeneous-prolog-epilog",
101 bool AArch64LowerHomogeneousPrologEpilog::runOnModule(
Module &M) {
106 &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
107 return AArch64LowerHomogeneousPE(&
M, MMI).run();
110 bool AArch64LowerHomogeneousPE::run() {
111 bool Changed =
false;
119 Changed |= runOnMachineFunction(*MF);
131 std::ostringstream RegStream;
134 RegStream <<
"OUTLINED_FUNCTION_PROLOG_";
137 RegStream <<
"OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset <<
"_";
140 RegStream <<
"OUTLINED_FUNCTION_EPILOG_";
143 RegStream <<
"OUTLINED_FUNCTION_EPILOG_TAIL_";
147 for (
auto Reg : Regs)
150 return RegStream.str();
160 assert(
F ==
nullptr &&
"Function has been created before");
163 assert(
F &&
"Function was null!");
171 F->addFnAttr(Attribute::OptimizeNone);
172 F->addFnAttr(Attribute::NoInline);
173 F->addFnAttr(Attribute::MinSize);
174 F->addFnAttr(Attribute::Naked);
199 int Offset,
bool IsPreDec) {
200 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
204 Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre;
206 Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi;
222 int Offset,
bool IsPostDec) {
223 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
227 Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost;
229 Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi;
274 unsigned FpOffset = 0) {
277 auto *
F =
M->getFunction(
Name);
291 auto LRIdx = std::distance(
292 Regs.begin(),
std::find(Regs.begin(), Regs.end(), AArch64::LR));
296 if (LRIdx !=
Size - 2) {
299 LRIdx -
Size + 2,
true);
303 for (
int I =
Size - 3;
I >= 0;
I -= 2) {
305 if (Regs[
I - 1] == AArch64::LR)
332 for (
int I = 0;
I <
Size - 2;
I += 2)
344 return M->getFunction(
Name);
359 auto RegCount = Regs.size();
360 assert(RegCount > 0 && (RegCount % 2 == 0));
362 int InstCount = RegCount / 2;
365 if (
std::find(Regs.begin(), Regs.end(), AArch64::LR) == Regs.end())
380 for (
auto NextMI = NextMBBI; NextMI !=
MBB.
end(); NextMI++) {
381 if (NextMI->readsRegister(AArch64::W16,
TRI))
386 if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
393 if (NextMBBI ==
MBB.
end())
395 if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
427 bool AArch64LowerHomogeneousPE::lowerEpilog(
435 for (
auto &MO :
MI.operands())
437 Regs.push_back(MO.getReg());
443 assert(
MI.getOpcode() == AArch64::HOM_Epilog);
448 auto *EpilogTailHelper =
456 NextMBBI = std::next(Return);
457 Return->removeFromParent();
469 for (
int I = 0;
I <
Size - 2;
I += 2)
501 bool AArch64LowerHomogeneousPE::lowerProlog(
511 for (
auto &MO :
MI.operands()) {
513 if (MO.getReg() == AArch64::LR)
515 Regs.push_back(MO.getReg());
516 }
else if (MO.isImm()) {
517 FpOffset = MO.getImm();
525 assert(
MI.getOpcode() == AArch64::HOM_Prolog);
552 for (
int I =
Size - 3;
I >= 0;
I -= 2)
577 unsigned Opcode =
MI.getOpcode();
581 case AArch64::HOM_Prolog:
582 return lowerProlog(
MBB,
MBBI, NextMBBI);
583 case AArch64::HOM_Epilog:
584 return lowerEpilog(
MBB,
MBBI, NextMBBI);
602 bool AArch64LowerHomogeneousPE::runOnMachineFunction(
MachineFunction &MF) {
612 return new AArch64LowerHomogeneousPrologEpilog();
ModulePass * createAArch64LowerHomogeneousPrologEpilogPass()
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual const TargetInstrInfo * getInstrInfo() const
return AArch64::GPR64RegClass contains(Reg)
static const char * getRegisterName(unsigned RegNo, unsigned AltIdx=AArch64::NoRegAltName)
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
The instances of the Type class are immutable: once they are created, they are never changed.
static Function * getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, SmallVectorImpl< unsigned > &Regs, FrameHelperType Type, unsigned FpOffset=0)
Return a unique function if a helper can be formed with the given Regs and frame type.
void insert(iterator MBBI, MachineBasicBlock *MBB)
#define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &)
unsigned const TargetRegisterInfo * TRI
LLVM Basic Block Representation.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
unsigned getDefRegState(bool B)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
TargetInstrInfo - Interface to description of machine instruction set.
MachineBasicBlock * removeFromParent()
This method unlinks 'this' from the containing function, and returns it, but does not delete it.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Represent the analysis usage information of a pass.
static std::string getFrameHelperName(SmallVectorImpl< unsigned > &Regs, FrameHelperType Type, unsigned FpOffset)
Return a frame helper name with the given CSRs and the helper type.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const HexagonInstrInfo * TII
MachineFunctionProperties & set(Property P)
This class contains meta information specific to a module.
@ Define
Register definition.
void freezeReservedRegs(const MachineFunction &)
freezeReservedRegs - Called by the register allocator to freeze the set of reserved registers before ...
@ Implicit
Not emitted register (e.g. carry, or temporary result).
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
static bool shouldUseFrameHelper(MachineBasicBlock &MBB, MachineBasicBlock::iterator &NextMBBI, SmallVectorImpl< unsigned > &Regs, FrameHelperType Type)
This function checks if a frame helper should be used for HOM_Prolog/HOM_Epilog pseudo instruction ex...
Representation of each machine instruction.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
This is an important class for using LLVM in a threaded context.
initializer< Ty > init(const Ty &Val)
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
A Module instance is used to store all the information related to an LLVM module.
iterator_range< succ_iterator > successors()
StringRef - Represent a constant reference to a string, i.e.
MachineBasicBlock MachineBasicBlock::iterator MBBI
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineFunction & getOrCreateMachineFunction(Function &F)
Returns the MachineFunction constructed for the IR function F.
MachineFunctionProperties & reset(Property P)
TargetSubtargetInfo - Generic base class for all target subtargets.
void setPreservesAll()
Set by analyses that do not transform their input at all.
@ ExternalLinkage
Externally visible function.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
static Type * getVoidTy(LLVMContext &C)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MachineFunction & createFrameHelperMachineFunction(Module *M, MachineModuleInfo *MMI, StringRef Name)
Create a Function for the unique frame helper with the given name.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
cl::opt< int > FrameHelperSizeThreshold("frame-helper-size-threshold", cl::init(2), cl::Hidden, cl::desc("The minimum number of instructions that are outlined in a frame " "helper (default = 2)"))
AnalysisUsage & addRequired()
INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, "aarch64-lower-homogeneous-prolog-epilog", AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) bool AArch64LowerHomogeneousPrologEpilog
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.