LLVM 22.0.0git
RISCVLoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Load/Store Pairing: It identifies pairs of load or store instructions
10// operating on consecutive memory locations and merges them into a single
11// paired instruction, leveraging hardware support for paired memory accesses.
12// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
13//
14// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
15// merging zero store instructions, promoting loads that read directly from a
16// preceding store, and merging base register updates with load/store
17// instructions (via pre-/post-indexed addressing). These advanced
18// transformations are not yet implemented in the RISC-V pass but represent
19// potential future enhancements for further optimizing RISC-V memory
20// operations.
21//
22//===----------------------------------------------------------------------===//
23
24#include "RISCV.h"
25#include "RISCVTargetMachine.h"
27#include "llvm/CodeGen/Passes.h"
29#include "llvm/Support/Debug.h"
31
32using namespace llvm;
33
34#define DEBUG_TYPE "riscv-load-store-opt"
35#define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer"
36
37// The LdStLimit limits number of instructions how far we search for load/store
38// pairs.
39static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
41
42namespace {
43
44struct RISCVLoadStoreOpt : public MachineFunctionPass {
45 static char ID;
46 bool runOnMachineFunction(MachineFunction &Fn) override;
47
48 RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
49
50 MachineFunctionProperties getRequiredProperties() const override {
51 return MachineFunctionProperties().setNoVRegs();
52 }
53
54 void getAnalysisUsage(AnalysisUsage &AU) const override {
55 AU.addRequired<AAResultsWrapperPass>();
57 }
58
59 StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
60
61 // Find and pair load/store instructions.
62 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
63
64 // Convert load/store pairs to single instructions.
65 bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
67
68 // Scan the instructions looking for a load/store that can be combined
69 // with the current instruction into a load/store pair.
70 // Return the matching instruction if one is found, else MBB->end().
72 bool &MergeForward);
73
75 mergePairedInsns(MachineBasicBlock::iterator I,
76 MachineBasicBlock::iterator Paired, bool MergeForward);
77
78private:
79 AliasAnalysis *AA;
80 MachineRegisterInfo *MRI;
81 const RISCVInstrInfo *TII;
82 const RISCVRegisterInfo *TRI;
83 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
84};
85} // end anonymous namespace
86
87char RISCVLoadStoreOpt::ID = 0;
89 false)
90
91bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
92 if (skipFunction(Fn.getFunction()))
93 return false;
94 const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
95 if (!Subtarget.useLoadStorePairs())
96 return false;
97
98 bool MadeChange = false;
99 TII = Subtarget.getInstrInfo();
100 TRI = Subtarget.getRegisterInfo();
101 MRI = &Fn.getRegInfo();
102 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
103 ModifiedRegUnits.init(*TRI);
104 UsedRegUnits.init(*TRI);
105
106 for (MachineBasicBlock &MBB : Fn) {
107 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
108
109 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
110 MBBI != E;) {
111 if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
112 tryToPairLdStInst(MBBI))
113 MadeChange = true;
114 else
115 ++MBBI;
116 }
117 }
118 return MadeChange;
119}
120
121// Find loads and stores that can be merged into a single load or store pair
122// instruction.
123bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
124 MachineInstr &MI = *MBBI;
125
126 // If this is volatile, it is not a candidate.
127 if (MI.hasOrderedMemoryRef())
128 return false;
129
130 if (!TII->isLdStSafeToPair(MI, TRI))
131 return false;
132
133 // Look ahead for a pairable instruction.
134 MachineBasicBlock::iterator E = MI.getParent()->end();
135 bool MergeForward;
136 MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward);
137 if (Paired != E) {
138 MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
139 return true;
140 }
141 return false;
142}
143
144// Merge two adjacent load/store instructions into a paired instruction
145// (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of
146// SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the
147// appropriate paired opcode, verifies that the memory operand is properly
148// aligned, and checks that the offset is valid. If all conditions are met, it
149// builds and inserts the paired instruction.
150bool RISCVLoadStoreOpt::tryConvertToLdStPair(
152 unsigned PairOpc;
153 Align RequiredAlignment;
154 switch (First->getOpcode()) {
155 default:
156 llvm_unreachable("Unsupported load/store instruction for pairing");
157 case RISCV::SW:
158 PairOpc = RISCV::MIPS_SWP;
159 RequiredAlignment = Align(8);
160 break;
161 case RISCV::LW:
162 PairOpc = RISCV::MIPS_LWP;
163 RequiredAlignment = Align(8);
164 break;
165 case RISCV::SD:
166 PairOpc = RISCV::MIPS_SDP;
167 RequiredAlignment = Align(16);
168 break;
169 case RISCV::LD:
170 PairOpc = RISCV::MIPS_LDP;
171 RequiredAlignment = Align(16);
172 break;
173 }
174
175 MachineFunction *MF = First->getMF();
176 const MachineMemOperand *MMO = *First->memoperands_begin();
177 Align MMOAlign = MMO->getAlign();
178
179 if (MMOAlign < RequiredAlignment)
180 return false;
181
182 int64_t Offset = First->getOperand(2).getImm();
183 if (!isUInt<7>(Offset))
184 return false;
185
186 MachineInstrBuilder MIB = BuildMI(
187 *MF, First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(),
188 TII->get(PairOpc));
189 MIB.add(First->getOperand(0))
190 .add(Second->getOperand(0))
191 .add(First->getOperand(1))
192 .add(First->getOperand(2))
193 .cloneMergedMemRefs({&*First, &*Second});
194
195 First->getParent()->insert(First, MIB);
196
197 First->removeFromParent();
198 Second->removeFromParent();
199
200 return true;
201}
202
203static bool mayAlias(MachineInstr &MIa,
205 AliasAnalysis *AA) {
206 for (MachineInstr *MIb : MemInsns)
207 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
208 return true;
209
210 return false;
211}
212
213// Scan the instructions looking for a load/store that can be combined with the
214// current instruction into a wider equivalent or a load/store pair.
215// TODO: Extend pairing logic to consider reordering both instructions
216// to a safe "middle" position rather than only merging forward/backward.
217// This requires more sophisticated checks for aliasing, register
218// liveness, and potential scheduling hazards.
220RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
221 bool &MergeForward) {
222 MachineBasicBlock::iterator E = I->getParent()->end();
224 MachineInstr &FirstMI = *I;
225 MBBI = next_nodbg(MBBI, E);
226
227 bool MayLoad = FirstMI.mayLoad();
228 Register Reg = FirstMI.getOperand(0).getReg();
229 Register BaseReg = FirstMI.getOperand(1).getReg();
230 int64_t Offset = FirstMI.getOperand(2).getImm();
231 int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
232
233 MergeForward = false;
234
235 // Track which register units have been modified and used between the first
236 // insn (inclusive) and the second insn.
237 ModifiedRegUnits.clear();
238 UsedRegUnits.clear();
239
240 // Remember any instructions that read/write memory between FirstMI and MI.
241 SmallVector<MachineInstr *, 4> MemInsns;
242
243 for (unsigned Count = 0; MBBI != E && Count < LdStLimit;
244 MBBI = next_nodbg(MBBI, E)) {
245 MachineInstr &MI = *MBBI;
246
247 // Don't count transient instructions towards the search limit since there
248 // may be different numbers of them if e.g. debug information is present.
249 if (!MI.isTransient())
250 ++Count;
251
252 if (MI.getOpcode() == FirstMI.getOpcode() &&
253 TII->isLdStSafeToPair(MI, TRI)) {
254 Register MIBaseReg = MI.getOperand(1).getReg();
255 int64_t MIOffset = MI.getOperand(2).getImm();
256
257 if (BaseReg == MIBaseReg) {
258 if ((Offset != MIOffset + OffsetStride) &&
259 (Offset + OffsetStride != MIOffset)) {
260 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
261 TRI);
262 MemInsns.push_back(&MI);
263 continue;
264 }
265
266 // If the destination register of one load is the same register or a
267 // sub/super register of the other load, bail and keep looking.
268 if (MayLoad &&
269 TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) {
270 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
271 TRI);
272 MemInsns.push_back(&MI);
273 continue;
274 }
275
276 // If the BaseReg has been modified, then we cannot do the optimization.
277 if (!ModifiedRegUnits.available(BaseReg))
278 return E;
279
280 // If the Rt of the second instruction was not modified or used between
281 // the two instructions and none of the instructions between the second
282 // and first alias with the second, we can combine the second into the
283 // first.
284 if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) &&
285 !(MI.mayLoad() &&
286 !UsedRegUnits.available(MI.getOperand(0).getReg())) &&
287 !mayAlias(MI, MemInsns, AA)) {
288
289 MergeForward = false;
290 return MBBI;
291 }
292
293 // Likewise, if the Rt of the first instruction is not modified or used
294 // between the two instructions and none of the instructions between the
295 // first and the second alias with the first, we can combine the first
296 // into the second.
297 if (!(MayLoad &&
298 !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) &&
299 !mayAlias(FirstMI, MemInsns, AA)) {
300
301 if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) {
302 MergeForward = true;
303 return MBBI;
304 }
305 }
306 // Unable to combine these instructions due to interference in between.
307 // Keep looking.
308 }
309 }
310
311 // If the instruction wasn't a matching load or store. Stop searching if we
312 // encounter a call instruction that might modify memory.
313 if (MI.isCall())
314 return E;
315
316 // Update modified / uses register units.
317 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
318
319 // Otherwise, if the base register is modified, we have no match, so
320 // return early.
321 if (!ModifiedRegUnits.available(BaseReg))
322 return E;
323
324 // Update list of instructions that read/write memory.
325 if (MI.mayLoadOrStore())
326 MemInsns.push_back(&MI);
327 }
328 return E;
329}
330
332RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
334 bool MergeForward) {
335 MachineBasicBlock::iterator E = I->getParent()->end();
337 // If NextI is the second of the two instructions to be merged, we need
338 // to skip one further. Either way we merge will invalidate the iterator,
339 // and we don't need to scan the new instruction, as it's a pairwise
340 // instruction, which we're not considering for further action anyway.
341 if (NextI == Paired)
342 NextI = next_nodbg(NextI, E);
343
344 // Insert our new paired instruction after whichever of the paired
345 // instructions MergeForward indicates.
346 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
347 MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired;
348 int Offset = I->getOperand(2).getImm();
349 int PairedOffset = Paired->getOperand(2).getImm();
350 bool InsertAfter = (Offset < PairedOffset) ^ MergeForward;
351
352 if (!MergeForward)
353 Paired->getOperand(1).setIsKill(false);
354
355 // Kill flags may become invalid when moving stores for pairing.
356 if (I->getOperand(0).isUse()) {
357 if (!MergeForward) {
358 // Check if the Paired store's source register has a kill flag and clear
359 // it only if there are intermediate uses between I and Paired.
360 MachineOperand &PairedRegOp = Paired->getOperand(0);
361 if (PairedRegOp.isKill()) {
362 for (auto It = std::next(I); It != Paired; ++It) {
363 if (It->readsRegister(PairedRegOp.getReg(), TRI)) {
364 PairedRegOp.setIsKill(false);
365 break;
366 }
367 }
368 }
369 } else {
370 // Clear kill flags of the first store's register in the forward
371 // direction.
372 Register Reg = I->getOperand(0).getReg();
373 for (MachineInstr &MI : make_range(std::next(I), std::next(Paired)))
374 MI.clearRegisterKills(Reg, TRI);
375 }
376 }
377
378 MachineInstr *ToInsert = DeletionPoint->removeFromParent();
379 MachineBasicBlock &MBB = *InsertionPoint->getParent();
381
382 if (!InsertAfter) {
383 First = MBB.insert(InsertionPoint, ToInsert);
384 Second = InsertionPoint;
385 } else {
386 Second = MBB.insertAfter(InsertionPoint, ToInsert);
387 First = InsertionPoint;
388 }
389
390 if (tryConvertToLdStPair(First, Second)) {
391 LLVM_DEBUG(dbgs() << "Pairing load/store:\n ");
392 LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
393 }
394
395 return NextI;
396}
397
398// Returns an instance of the Load / Store Optimization pass.
400 return new RISCVLoadStoreOpt();
401}
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define RISCV_LOAD_STORE_OPT_NAME
static cl::opt< unsigned > LdStLimit("riscv-load-store-scan-limit", cl::init(128), cl::Hidden)
#define LLVM_DEBUG(...)
Definition Debug.h:119
AnalysisUsage & addRequired()
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void clear()
Clears the set.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
int64_t getImm() const
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
FunctionPass * createRISCVLoadStoreOptPass()
@ Offset
Definition DWP.cpp:477
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.