Go to the documentation of this file.
40 #define DEBUG_TYPE "loadstore-opt"
44 using namespace MIPatternMatch;
46 STATISTIC(NumStoresMerged,
"Number of stores merged");
65 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
87 Info.IsIndexSignExt =
false;
93 Info.Offset = RHSCst->Value.getSExtValue();
97 Info.IndexReg = PtrAddRHS;
98 Info.IsIndexSignExt =
false;
106 auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
107 auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
108 if (!LdSt1 || !LdSt2)
117 int64_t Size1 = LdSt1->getMemSize();
118 int64_t Size2 = LdSt2->getMemSize();
133 IsAlias = !(Size1 <= PtrDiff);
141 IsAlias = !((PtrDiff + Size2) <= 0);
153 if (!Base0Def || !Base1Def)
157 if (Base0Def->getOpcode() != Base1Def->getOpcode())
160 if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
164 if (Base0Def != Base1Def &&
174 if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
175 auto GV0 = Base0Def->getOperand(1).getGlobal();
176 auto GV1 = Base1Def->getOperand(1).getGlobal();
191 struct MemUseCharacteristics {
200 auto getCharacteristics =
202 if (
const auto *
LS = dyn_cast<GLoadStore>(
MI)) {
208 BaseReg =
LS->getPointerReg();
213 LS->getMMO().getMemoryType().getSizeInBytes());
214 return {
LS->isVolatile(),
LS->isAtomic(), BaseReg,
215 Offset , Size, &
LS->getMMO()};
224 MemUseCharacteristics MUC0 = getCharacteristics(&
MI),
225 MUC1 = getCharacteristics(&
Other);
228 if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
229 MUC0.Offset == MUC1.Offset)
233 if (MUC0.IsVolatile && MUC1.IsVolatile)
238 if (MUC0.IsAtomic && MUC1.IsAtomic)
243 if (MUC0.MMO && MUC1.MMO) {
244 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
245 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
256 if (!MUC0.MMO || !MUC1.MMO)
260 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
261 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
264 if (
AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
268 int64_t MinOffset =
std::min(SrcValOffset0, SrcValOffset1);
269 int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
270 int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
272 MUC0.MMO->getAAInfo()),
274 MUC1.MMO->getAAInfo())))
285 return MI.hasUnmodeledSideEffects() ||
MI.hasOrderedMemoryRef();
291 assert(StoresToMerge.size() > 1 &&
"Expected multiple stores to merge");
292 LLT OrigTy = MRI->
getType(StoresToMerge[0]->getValueReg());
293 LLT PtrTy = MRI->
getType(StoresToMerge[0]->getPointerReg());
296 initializeStoreMergeTargetInfo(AS);
297 const auto &LegalSizes = LegalStoreSizes[AS];
300 for (
auto StoreMI : StoresToMerge)
305 bool AnyMerged =
false;
310 unsigned MergeSizeBits;
311 for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
315 if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
323 unsigned NumStoresToMerge = MergeSizeBits / OrigTy.
getSizeInBits();
326 StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
327 AnyMerged |= doSingleStoreMerge(SingleMergeStores);
328 StoresToMerge.erase(StoresToMerge.begin(),
329 StoresToMerge.begin() + NumStoresToMerge);
330 }
while (StoresToMerge.size() > 1);
334 bool LoadStoreOpt::isLegalOrBeforeLegalizer(
const LegalityQuery &Query,
344 assert(Stores.size() > 1);
350 GStore *FirstStore = Stores[0];
351 const unsigned NumStores = Stores.size();
358 for (
unsigned AIdx = 0, BIdx = 1; BIdx < NumStores; ++AIdx, ++BIdx)
368 for (
auto Store : Stores) {
372 ConstantVals.
clear();
381 if (ConstantVals.empty()) {
388 assert(ConstantVals.size() == NumStores);
390 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
393 for (
unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
396 WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.
getSizeInBits());
403 NumStoresMerged += Stores.size();
410 R <<
"Merged " <<
NV(
"NumMerged", Stores.size()) <<
" stores of "
412 <<
" bytes into a single store of "
417 for (
auto MI : Stores)
418 InstsToErase.insert(
MI);
422 bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &
C) {
423 if (
C.Stores.size() < 2) {
428 LLVM_DEBUG(
dbgs() <<
"Checking store merge candidate with " <<
C.Stores.size()
429 <<
" stores, starting with " << *
C.Stores[0]);
443 auto DoesStoreAliasWithPotential = [&](
unsigned Idx,
GStore &CheckStore) {
444 for (
auto AliasInfo :
reverse(
C.PotentialAliases)) {
446 unsigned PreCheckedIdx = AliasInfo.second;
447 if (
static_cast<unsigned>(Idx) > PreCheckedIdx) {
466 for (
int StoreIdx =
C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
467 auto *CheckStore =
C.Stores[StoreIdx];
468 if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
474 <<
" stores remaining after alias checks. Merging...\n");
478 if (StoresToMerge.size() < 2)
480 return mergeStores(StoresToMerge);
483 bool LoadStoreOpt::operationAliasesWithCandidate(
MachineInstr &
MI,
484 StoreMergeCandidate &
C) {
485 if (
C.Stores.empty())
488 return instMayAlias(MI, *OtherMI, *MRI, AA);
492 void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(
MachineInstr &
MI) {
493 PotentialAliases.emplace_back(std::make_pair(&
MI, Stores.size() - 1));
496 bool LoadStoreOpt::addStoreToCandidate(
GStore &StoreMI,
497 StoreMergeCandidate &
C) {
521 if (
C.Stores.empty()) {
527 C.BasePtr = StoreBase;
528 C.CurrentLowestOffset = StoreOffCst;
529 C.Stores.emplace_back(&StoreMI);
530 LLVM_DEBUG(
dbgs() <<
"Starting a new merge candidate group with: "
546 if (
C.BasePtr != StoreBase)
553 C.Stores.emplace_back(&StoreMI);
560 bool Changed =
false;
562 StoreMergeCandidate Candidate;
564 if (InstsToErase.contains(&
MI))
567 if (
auto *StoreMI = dyn_cast<GStore>(&
MI)) {
570 if (!addStoreToCandidate(*StoreMI, Candidate)) {
573 if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
574 Changed |= processMergeCandidate(Candidate);
577 Candidate.addPotentialAlias(*StoreMI);
583 if (Candidate.Stores.empty())
588 Changed |= processMergeCandidate(Candidate);
589 Candidate.Stores.clear();
593 if (!
MI.mayLoadOrStore())
596 if (operationAliasesWithCandidate(
MI, Candidate)) {
599 Changed |= processMergeCandidate(Candidate);
605 Candidate.addPotentialAlias(
MI);
609 Changed |= processMergeCandidate(Candidate);
612 for (
auto *
MI : InstsToErase)
613 MI->eraseFromParent();
614 InstsToErase.clear();
619 bool Changed =
false;
620 for (
auto &
BB : MF) {
621 Changed |= mergeBlockStores(
BB);
626 void LoadStoreOpt::initializeStoreMergeTargetInfo(
unsigned AddrSpace) {
631 if (LegalStoreSizes.count(AddrSpace)) {
632 assert(LegalStoreSizes[AddrSpace].
any());
653 LegalSizes.set(Size);
655 assert(LegalSizes.any() &&
"Expected some store sizes to be legal!");
656 LegalStoreSizes[AddrSpace] = LegalSizes;
669 bool Changed =
false;
670 Changed |= mergeFunctionStores(MF);
672 LegalStoreSizes.clear();
Helper struct to store a base, index and offset that forms an address.
void setMF(MachineFunction &MF)
bool hasProperty(Property P) const
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
This is an optimization pass for GlobalISel generic memory operations.
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
operand_type_match m_Reg()
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
ScalarTy getFixedSize() const
LegalizeAction Action
The action to take or the final answer.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Register getValueReg() const
Get the stored value register.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI)
Returns a BaseIndexOffset which describes the pointer in Ptr.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
The instances of the Type class are immutable: once they are created, they are never changed.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
uint64_t getMemSizeInBits() const
Returns the size in bits of the memory access.
A description of a memory reference used in the backend.
DiagnosticInfoOptimizationBase::Argument NV
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
static uint64_t getSizeOrUnknown(const TypeSize &T)
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", false, false) INITIALIZE_PASS_END(LoadStoreOpt
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
(vector float) vec_cmpeq(*A, *B) C
TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Represent the analysis usage information of a pass.
const MachineFunctionProperties & getProperties() const
Get the function properties.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
STATISTIC(NumFunctions, "Total number of functions")
Analysis containing CSE Info
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
Register getPointerReg() const
Get the source register of the pointer value.
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Register getReg(unsigned Idx) const
Get the register for the operand index.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
unsigned getAddressSpace() const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Generic memory optimizations
Representation of each machine instruction.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Module * getParent()
Get the module that this global value is contained inside of...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isInstHardMergeHazard(MachineInstr &MI)
Returns true if the instruction creates an unavoidable hazard that forces a boundary between store me...
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
print Print MemDeps of function
Class for arbitrary precision integers.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const MachineBasicBlock * getParent() const
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
bool instMayAlias(const MachineInstr &MI, const MachineInstr &Other, MachineRegisterInfo &MRI, AliasAnalysis *AA)
Returns true if the instruction MI may alias Other.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual const LegalizerInfo * getLegalizerInfo() const
Function & getFunction()
Return the LLVM function that this machine code represents.
ConstantMatch< APInt > m_ICst(APInt &Cst)
TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool aliasIsKnownForLoadStore(const MachineInstr &MI1, const MachineInstr &MI2, bool &IsAlias, MachineRegisterInfo &MRI)
Compute whether or not a memory access at MI1 aliases with an access at MI2.
const unsigned MaxStoreSizeToForm
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
virtual const TargetLowering * getTargetLowering() const
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
@ Unsupported
This operation is completely unsupported on the target.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
AnalysisUsage & addRequired()
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Representation for a specific memory location.
Optional< std::vector< StOtherPiece > > Other
reference emplace_back(ArgTypes &&... Args)