40#define DEBUG_TYPE "loadstore-opt"
44using namespace MIPatternMatch;
46STATISTIC(NumStoresMerged,
"Number of stores merged");
65 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
88 Info.IsIndexSignExt =
false;
94 Info.Offset = RHSCst->Value.getSExtValue();
98 Info.IndexReg = PtrAddRHS;
99 Info.IsIndexSignExt =
false;
107 auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
108 auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
109 if (!LdSt1 || !LdSt2)
118 int64_t Size1 = LdSt1->getMemSize();
119 int64_t Size2 = LdSt2->getMemSize();
134 IsAlias = !(Size1 <= PtrDiff);
142 IsAlias = !((PtrDiff + Size2) <= 0);
154 if (!Base0Def || !Base1Def)
158 if (Base0Def->getOpcode() != Base1Def->getOpcode())
161 if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
165 if (Base0Def != Base1Def &&
175 if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
176 auto GV0 = Base0Def->getOperand(1).getGlobal();
177 auto GV1 = Base1Def->getOperand(1).getGlobal();
192 struct MemUseCharacteristics {
201 auto getCharacteristics =
203 if (
const auto *LS = dyn_cast<GLoadStore>(
MI)) {
209 BaseReg = LS->getPointerReg();
214 LS->getMMO().getMemoryType().getSizeInBytes());
215 return {LS->isVolatile(), LS->isAtomic(), BaseReg,
225 MemUseCharacteristics MUC0 = getCharacteristics(&
MI),
226 MUC1 = getCharacteristics(&
Other);
229 if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
230 MUC0.Offset == MUC1.Offset)
234 if (MUC0.IsVolatile && MUC1.IsVolatile)
239 if (MUC0.IsAtomic && MUC1.IsAtomic)
244 if (MUC0.MMO && MUC1.MMO) {
245 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
246 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
257 if (!MUC0.MMO || !MUC1.MMO)
261 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
262 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
265 if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
269 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
270 int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
271 int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
273 MUC0.MMO->getAAInfo()),
275 MUC1.MMO->getAAInfo())))
286 return MI.hasUnmodeledSideEffects() ||
MI.hasOrderedMemoryRef();
292 assert(StoresToMerge.
size() > 1 &&
"Expected multiple stores to merge");
293 LLT OrigTy = MRI->
getType(StoresToMerge[0]->getValueReg());
294 LLT PtrTy = MRI->
getType(StoresToMerge[0]->getPointerReg());
297 initializeStoreMergeTargetInfo(AS);
298 const auto &LegalSizes = LegalStoreSizes[AS];
301 for (
auto *StoreMI : StoresToMerge)
306 bool AnyMerged =
false;
311 unsigned MergeSizeBits;
312 for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
316 if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
324 unsigned NumStoresToMerge = MergeSizeBits / OrigTy.
getSizeInBits();
327 StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
328 AnyMerged |= doSingleStoreMerge(SingleMergeStores);
329 StoresToMerge.erase(StoresToMerge.begin(),
330 StoresToMerge.begin() + NumStoresToMerge);
331 }
while (StoresToMerge.size() > 1);
335bool LoadStoreOpt::isLegalOrBeforeLegalizer(
const LegalityQuery &Query,
341 return IsPreLegalizer ||
Action == LegalizeAction::Legal;
351 GStore *FirstStore = Stores[0];
352 const unsigned NumStores = Stores.
size();
369 for (
auto *Store : Stores) {
373 ConstantVals.
clear();
382 if (ConstantVals.
empty()) {
391 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
404 <<
" stores into merged store: " << *NewStore);
406 NumStoresMerged += Stores.size();
413 R <<
"Merged " <<
NV(
"NumMerged", Stores.size()) <<
" stores of "
415 <<
" bytes into a single store of "
420 for (
auto *
MI : Stores)
421 InstsToErase.insert(
MI);
425bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &
C) {
426 if (
C.Stores.size() < 2) {
431 LLVM_DEBUG(
dbgs() <<
"Checking store merge candidate with " <<
C.Stores.size()
432 <<
" stores, starting with " << *
C.Stores[0]);
446 auto DoesStoreAliasWithPotential = [&](
unsigned Idx,
GStore &CheckStore) {
447 for (
auto AliasInfo :
reverse(
C.PotentialAliases)) {
449 unsigned PreCheckedIdx = AliasInfo.second;
450 if (
static_cast<unsigned>(
Idx) < PreCheckedIdx) {
468 for (
int StoreIdx =
C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
469 auto *CheckStore =
C.Stores[StoreIdx];
470 if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
476 <<
" stores remaining after alias checks. Merging...\n");
480 if (StoresToMerge.
size() < 2)
482 return mergeStores(StoresToMerge);
486 StoreMergeCandidate &
C) {
487 if (
C.Stores.empty())
490 return instMayAlias(MI, *OtherMI, *MRI, AA);
494void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(
MachineInstr &
MI) {
495 PotentialAliases.emplace_back(std::make_pair(&
MI, Stores.size() - 1));
498bool LoadStoreOpt::addStoreToCandidate(
GStore &StoreMI,
499 StoreMergeCandidate &
C) {
523 if (
C.Stores.empty()) {
529 C.BasePtr = StoreBase;
530 C.CurrentLowestOffset = StoreOffCst;
531 C.Stores.emplace_back(&StoreMI);
532 LLVM_DEBUG(
dbgs() <<
"Starting a new merge candidate group with: "
548 if (
C.BasePtr != StoreBase)
555 C.Stores.emplace_back(&StoreMI);
562 bool Changed =
false;
564 StoreMergeCandidate Candidate;
566 if (InstsToErase.contains(&
MI))
569 if (
auto *StoreMI = dyn_cast<GStore>(&
MI)) {
572 if (!addStoreToCandidate(*StoreMI, Candidate)) {
575 if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
576 Changed |= processMergeCandidate(Candidate);
579 Candidate.addPotentialAlias(*StoreMI);
585 if (Candidate.Stores.empty())
590 Changed |= processMergeCandidate(Candidate);
591 Candidate.Stores.clear();
595 if (!
MI.mayLoadOrStore())
598 if (operationAliasesWithCandidate(
MI, Candidate)) {
601 Changed |= processMergeCandidate(Candidate);
607 Candidate.addPotentialAlias(
MI);
611 Changed |= processMergeCandidate(Candidate);
614 for (
auto *
MI : InstsToErase)
615 MI->eraseFromParent();
616 InstsToErase.clear();
621 bool Changed =
false;
622 for (
auto &BB : MF) {
623 Changed |= mergeBlockStores(BB);
628void LoadStoreOpt::initializeStoreMergeTargetInfo(
unsigned AddrSpace) {
633 if (LegalStoreSizes.count(AddrSpace)) {
634 assert(LegalStoreSizes[AddrSpace].
any());
655 LegalSizes.set(
Size);
657 assert(LegalSizes.any() &&
"Expected some store sizes to be legal!");
658 LegalStoreSizes[AddrSpace] = LegalSizes;
671 bool Changed =
false;
672 Changed |= mergeFunctionStores(MF);
674 LegalStoreSizes.clear();
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Interface for Targets to specify which operations they can successfully select and how the others sho...
Generic memory optimizations
const unsigned MaxStoreSizeToForm
static bool isInstHardMergeHazard(MachineInstr &MI)
Returns true if the instruction creates an unavoidable hazard that forces a boundary between store me...
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
print Print MemDeps of function
This file provides utility analysis objects describing memory locations.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Class for arbitrary precision integers.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
uint64_t getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getValueReg() const
Get the stored value register.
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr unsigned getAddressSpace() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool hasProperty(Property P) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
void setMF(MachineFunction &MF)
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
A description of a memory reference used in the backend.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Representation for a specific memory location.
static uint64_t getSizeOrUnknown(const TypeSize &T)
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Wrapper class representing virtual and physical registers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual const LegalizerInfo * getLegalizerInfo() const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
constexpr ScalarTy getFixedValue() const
@ C
The default llvm calling convention, compatible with C.
bool aliasIsKnownForLoadStore(const MachineInstr &MI1, const MachineInstr &MI2, bool &IsAlias, MachineRegisterInfo &MRI)
Compute whether or not a memory access at MI1 aliases with an access at MI2.
BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI)
Returns a BaseIndexOffset which describes the pointer in Ptr.
bool instMayAlias(const MachineInstr &MI, const MachineInstr &Other, MachineRegisterInfo &MRI, AliasAnalysis *AA)
Returns true if the instruction MI may alias Other.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ Unsupported
This operation is completely unsupported on the target.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
ManagedStatic< cl::opt< FnT >, OptCreatorT > Action
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Helper struct to store a base, index and offset that forms an address.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.