LLVM 22.0.0git
RISCVLoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Load/Store Pairing: It identifies pairs of load or store instructions
10// operating on consecutive memory locations and merges them into a single
11// paired instruction, leveraging hardware support for paired memory accesses.
12// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
13//
14// Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if
15// register allocation didn't provide suitable consecutive registers.
16//
17// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
18// merging zero store instructions, promoting loads that read directly from a
19// preceding store, and merging base register updates with load/store
20// instructions (via pre-/post-indexed addressing). These advanced
21// transformations are not yet implemented in the RISC-V pass but represent
22// potential future enhancements for further optimizing RISC-V memory
23// operations.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVTargetMachine.h"
30#include "llvm/ADT/Statistic.h"
32#include "llvm/CodeGen/Passes.h"
34#include "llvm/Support/Debug.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "riscv-load-store-opt"
40#define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer"
41
42// The LdStLimit limits number of instructions how far we search for load/store
43// pairs.
44static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
46STATISTIC(NumLD2LW, "Number of LD instructions split back to LW");
47STATISTIC(NumSD2SW, "Number of SD instructions split back to SW");
48
49namespace {
50
51struct RISCVLoadStoreOpt : public MachineFunctionPass {
52 static char ID;
53 bool runOnMachineFunction(MachineFunction &Fn) override;
54
55 RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
56
57 MachineFunctionProperties getRequiredProperties() const override {
58 return MachineFunctionProperties().setNoVRegs();
59 }
60
61 void getAnalysisUsage(AnalysisUsage &AU) const override {
62 AU.addRequired<AAResultsWrapperPass>();
64 }
65
66 StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
67
68 // Find and pair load/store instructions.
69 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
70
71 // Convert load/store pairs to single instructions.
72 bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
74 bool tryConvertToXqcilsmLdStPair(MachineFunction *MF,
77 bool tryConvertToXqcilsmMultiLdSt(MachineBasicBlock::iterator &First);
78 bool tryConvertToMIPSLdStPair(MachineFunction *MF,
81
82 // Scan the instructions looking for a load/store that can be combined
83 // with the current instruction into a load/store pair.
84 // Return the matching instruction if one is found, else MBB->end().
86 bool &MergeForward);
87
89 mergePairedInsns(MachineBasicBlock::iterator I,
90 MachineBasicBlock::iterator Paired, bool MergeForward);
91
92 // Post reg-alloc zilsd part
93 bool fixInvalidRegPairOp(MachineBasicBlock &MBB,
95 bool isValidZilsdRegPair(Register First, Register Second);
96 void splitLdSdIntoTwo(MachineBasicBlock &MBB,
97 MachineBasicBlock::iterator &MBBI, bool IsLoad);
98
99private:
100 AliasAnalysis *AA;
101 MachineRegisterInfo *MRI;
102 const RISCVInstrInfo *TII;
103 const RISCVRegisterInfo *TRI;
104 const RISCVSubtarget *STI = nullptr;
105 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
106};
107} // end anonymous namespace
108
109char RISCVLoadStoreOpt::ID = 0;
111 false)
112
113bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
114 if (skipFunction(Fn.getFunction()))
115 return false;
116
117 bool MadeChange = false;
118 STI = &Fn.getSubtarget<RISCVSubtarget>();
119 TII = STI->getInstrInfo();
120 TRI = STI->getRegisterInfo();
121 MRI = &Fn.getRegInfo();
122 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
123 ModifiedRegUnits.init(*TRI);
124 UsedRegUnits.init(*TRI);
125
126 if (STI->useMIPSLoadStorePairs() || STI->hasVendorXqcilsm()) {
127 for (MachineBasicBlock &MBB : Fn) {
128 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
129
130 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
131 MBBI != E;) {
132 if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
133 tryToPairLdStInst(MBBI))
134 MadeChange = true;
135 else
136 ++MBBI;
137 }
138 }
139 }
140
141 if (!STI->is64Bit() && STI->hasStdExtZilsd()) {
142 for (auto &MBB : Fn) {
143 for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) {
144 if (fixInvalidRegPairOp(MBB, MBBI)) {
145 MadeChange = true;
146 // Iterator was updated by fixInvalidRegPairOp
147 } else {
148 ++MBBI;
149 }
150 }
151 }
152 }
153
154 return MadeChange;
155}
156
157// Find loads and stores that can be merged into a single load or store pair
158// instruction.
159bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
160 MachineInstr &MI = *MBBI;
161
162 // If this is volatile, it is not a candidate.
163 if (MI.hasOrderedMemoryRef())
164 return false;
165
166 if (!TII->isLdStSafeToPair(MI, TRI))
167 return false;
168
169 // If Xqcilsm is available, first try to form a multi-instruction group (>2).
170 if (!STI->is64Bit() && STI->hasVendorXqcilsm()) {
171 if (tryConvertToXqcilsmMultiLdSt(MBBI))
172 return true;
173 }
174
175 // Look ahead for a pairable instruction.
176 MachineBasicBlock::iterator E = MI.getParent()->end();
177 bool MergeForward;
178 MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward);
179 if (Paired != E) {
180 MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
181 return true;
182 }
183 return false;
184}
185
186static bool isMemOpAligned(MachineInstr &MI, Align RequiredAlignment) {
187 const MachineMemOperand *MMO = *MI.memoperands_begin();
188 Align MMOAlign = MMO->getAlign();
189 return MMOAlign >= RequiredAlignment;
190}
191
192// Convert set of 3 or more LW/SW instructions to QC_LWMI/QC_SWMI/QC_SETWMI.
193// For now this only handles consecutive loads and stores traversing the basic
194// block top-down.
195// TODO: Traverse the basic block bottom-up as well.
196bool RISCVLoadStoreOpt::tryConvertToXqcilsmMultiLdSt(
198 MachineInstr &FirstMI = *FirstIt;
199 MachineFunction *MF = FirstMI.getMF();
200
201 if (STI->is64Bit() || !STI->hasVendorXqcilsm())
202 return false;
203
204 unsigned Opc = FirstMI.getOpcode();
205 if (Opc != RISCV::LW && Opc != RISCV::SW)
206 return false;
207
208 if (!FirstMI.hasOneMemOperand())
209 return false;
210
211 if (!isMemOpAligned(FirstMI, Align(4)))
212 return false;
213
214 // Require simple reg+imm addressing.
215 const MachineOperand &BaseOp = FirstMI.getOperand(1);
216 const MachineOperand &OffOp = FirstMI.getOperand(2);
217 if (!BaseOp.isReg() || !OffOp.isImm())
218 return false;
219
220 Register Base = BaseOp.getReg();
221 int64_t BaseOff = OffOp.getImm();
222
223 if (!isShiftedUInt<5, 2>(BaseOff))
224 return false;
225
226 Register StartReg = FirstMI.getOperand(0).getReg();
227 bool IsLoad = (Opc == RISCV::LW);
228
229 // Load rd cannot be x0 and must not clobber the base register.
230 if (IsLoad) {
231 if (StartReg == RISCV::X0)
232 return false;
233 if (StartReg == Base)
234 return false;
235 }
236
237 // Collect a set of consecutive matching instructions.
238 SmallVector<MachineInstr *, 8> Group;
239 Group.push_back(&FirstMI);
240
241 MachineBasicBlock::iterator E = FirstIt->getParent()->end();
243 int64_t ExpectedOff = BaseOff + 4;
244 unsigned Index = 1;
245 enum class StoreMode { Unknown, Setwmi, Swmi };
246 StoreMode SMode = StoreMode::Unknown;
247
248 while (It != E) {
249 MachineInstr &MI = *It;
250
251 if (!TII->isPairableLdStInstOpc(MI.getOpcode()))
252 break;
253 if (MI.getOpcode() != Opc)
254 break;
255 if (!TII->isLdStSafeToPair(MI, TRI))
256 break;
257 if (!MI.hasOneMemOperand())
258 break;
259 if (!isMemOpAligned(MI, Align(4)))
260 break;
261
262 const MachineOperand &BaseMIOp = MI.getOperand(1);
263 const MachineOperand &OffsetMIOp = MI.getOperand(2);
264 if (!BaseMIOp.isReg() || !OffsetMIOp.isImm())
265 break;
266 if (BaseMIOp.getReg() != Base)
267 break;
268 int64_t Off = OffsetMIOp.getImm();
269 if (Off != ExpectedOff)
270 break;
271
272 Register Reg = MI.getOperand(0).getReg();
273 if (IsLoad) {
274 // For loads, require consecutive destination registers.
275 if (Reg != StartReg + Index)
276 break;
277 if (Reg == Base)
278 break;
279 } else {
280 // For stores, decide mode based on the second instruction and then
281 // enforce the same for the rest.
282 if (SMode == StoreMode::Unknown) {
283 if (Reg == StartReg)
284 SMode = StoreMode::Setwmi;
285 else if (Reg == StartReg + 1)
286 SMode = StoreMode::Swmi;
287 else
288 break;
289 } else if (SMode == StoreMode::Setwmi) {
290 if (Reg != StartReg)
291 break;
292 } else {
293 if (Reg != StartReg + Index)
294 break;
295 }
296 }
297
298 // Passed checks, extend the group.
299 Group.push_back(&MI);
300 ++Index;
301 ExpectedOff += 4;
302 It = next_nodbg(It, E);
303 }
304
305 // We only handle more than 2 here. Pairs are handled in
306 // tryConvertToXqcilsmLdStPair.
307 unsigned Len = Group.size();
308 if (Len < 3 || Len > 31)
309 return false;
310
311 unsigned NewOpc;
312 unsigned StartRegState;
313 bool AddImplicitRegs = true;
314
315 if (IsLoad) {
316 NewOpc = RISCV::QC_LWMI;
317 StartRegState = static_cast<unsigned>(RegState::Define);
318 } else {
319 assert(SMode != StoreMode::Unknown &&
320 "Group should be large enough to know the store mode");
321 if (SMode == StoreMode::Setwmi) {
322 NewOpc = RISCV::QC_SETWMI;
323 // Kill if any of the individual stores killed the reg.
324 bool StartKill = false;
325 for (MachineInstr *MI : Group)
326 StartKill |= MI->getOperand(0).isKill();
327 StartRegState = getKillRegState(StartKill);
328 AddImplicitRegs = false;
329 } else {
330 // SWMI requires consecutive source regs and rd != x0.
331 if (StartReg == RISCV::X0)
332 return false;
333 NewOpc = RISCV::QC_SWMI;
334 StartRegState = getKillRegState(Group.front()->getOperand(0).isKill());
335 }
336 }
337
338 // Aggregate kill on base.
339 bool BaseKill = false;
340 for (MachineInstr *MI : Group)
341 BaseKill |= MI->getOperand(1).isKill();
342
343 // Build the new instruction.
344 DebugLoc DL = FirstMI.getDebugLoc();
345 if (!DL)
346 DL = Group.back()->getDebugLoc();
347 MachineInstrBuilder MIB = BuildMI(*MF, DL, TII->get(NewOpc));
348 MIB.addReg(StartReg, StartRegState)
349 .addReg(Base, getKillRegState(BaseKill))
350 .addImm(Len)
351 .addImm(BaseOff);
352
353 // Merge memory references.
354 MIB.cloneMergedMemRefs(Group);
355
356 if (AddImplicitRegs) {
357 // Add implicit operands for the additional registers.
358 for (unsigned i = 1; i < Len; ++i) {
359 Register R = StartReg + i;
360 unsigned State = 0;
361 if (IsLoad)
362 State = static_cast<unsigned>(RegState::ImplicitDefine);
363 else
364 State = RegState::Implicit |
365 getKillRegState(Group[i]->getOperand(0).isKill());
366 MIB.addReg(R, State);
367 }
368 }
369
370 // Insert before the first instruction and remove all in the group.
371 MachineBasicBlock *MBB = FirstIt->getParent();
372 MachineBasicBlock::iterator NewIt = MBB->insert(FirstIt, MIB);
373 for (MachineInstr *MI : Group)
374 MI->removeFromParent();
375
376 // Advance the cursor to the next non-debug instruction after the group.
377 FirstIt = next_nodbg(NewIt, MBB->end());
378 return true;
379}
380
381bool RISCVLoadStoreOpt::tryConvertToXqcilsmLdStPair(
382 MachineFunction *MF, MachineBasicBlock::iterator First,
384 unsigned Opc = First->getOpcode();
385 if ((Opc != RISCV::LW && Opc != RISCV::SW) || Second->getOpcode() != Opc)
386 return false;
387
388 const auto &FirstOp1 = First->getOperand(1);
389 const auto &SecondOp1 = Second->getOperand(1);
390 const auto &FirstOp2 = First->getOperand(2);
391 const auto &SecondOp2 = Second->getOperand(2);
392
393 // Require simple reg+imm addressing for both.
394 if (!FirstOp1.isReg() || !SecondOp1.isReg() || !FirstOp2.isImm() ||
395 !SecondOp2.isImm())
396 return false;
397
398 Register Base1 = FirstOp1.getReg();
399 Register Base2 = SecondOp1.getReg();
400
401 if (Base1 != Base2)
402 return false;
403
404 if (!First->hasOneMemOperand() || !Second->hasOneMemOperand())
405 return false;
406
407 if (!isMemOpAligned(*First, Align(4)) || !isMemOpAligned(*Second, Align(4)))
408 return false;
409
410 auto &FirstOp0 = First->getOperand(0);
411 auto &SecondOp0 = Second->getOperand(0);
412
413 int64_t Off1 = FirstOp2.getImm();
414 int64_t Off2 = SecondOp2.getImm();
415
416 if (Off2 < Off1) {
417 std::swap(FirstOp0, SecondOp0);
418 std::swap(Off1, Off2);
419 }
420
421 if (!isShiftedUInt<5, 2>(Off1) || (Off2 - Off1 != 4))
422 return false;
423
424 Register StartReg = FirstOp0.getReg();
425 Register NextReg = SecondOp0.getReg();
426
427 unsigned XqciOpc;
428 unsigned StartRegState;
429 unsigned NextRegState = 0;
430 bool AddNextReg = true;
431
432 if (Opc == RISCV::LW) {
433
434 if (StartReg == RISCV::X0)
435 return false;
436
437 // If the base reg gets overwritten by one of the loads bail out.
438 if (StartReg == Base1 || NextReg == Base1)
439 return false;
440
441 // The registers need to be consecutive.
442 if (NextReg != StartReg + 1)
443 return false;
444
445 XqciOpc = RISCV::QC_LWMI;
446 StartRegState = static_cast<unsigned>(RegState::Define);
447 NextRegState = static_cast<unsigned>(RegState::ImplicitDefine);
448 } else {
449 assert(Opc == RISCV::SW && "Expected a SW instruction");
450 if (StartReg == NextReg) {
451 XqciOpc = RISCV::QC_SETWMI;
452 StartRegState = getKillRegState(FirstOp0.isKill() || SecondOp0.isKill());
453 AddNextReg = false;
454 } else if (NextReg == StartReg + 1 && StartReg != RISCV::X0) {
455 XqciOpc = RISCV::QC_SWMI;
456 StartRegState = getKillRegState(FirstOp0.isKill());
457 NextRegState = RegState::Implicit | getKillRegState(SecondOp0.isKill());
458 } else {
459 return false;
460 }
461 }
462
463 DebugLoc DL =
464 First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc();
465 MachineInstrBuilder MIB = BuildMI(*MF, DL, TII->get(XqciOpc));
466 MIB.addReg(StartReg, StartRegState)
467 .addReg(Base1, getKillRegState(FirstOp1.isKill() || SecondOp1.isKill()))
468 .addImm(2)
469 .addImm(Off1)
470 .cloneMergedMemRefs({&*First, &*Second});
471
472 if (AddNextReg)
473 MIB.addReg(NextReg, NextRegState);
474
475 First->getParent()->insert(First, MIB);
476 First->removeFromParent();
477 Second->removeFromParent();
478
479 return true;
480}
481
482bool RISCVLoadStoreOpt::tryConvertToMIPSLdStPair(
483 MachineFunction *MF, MachineBasicBlock::iterator First,
485 // Try converting to SWP/LWP/LDP/SDP.
486 // SWP/LWP requires 8-byte alignment whereas LDP/SDP needs 16-byte alignment.
487 unsigned PairOpc;
488 Align RequiredAlignment;
489 switch (First->getOpcode()) {
490 default:
491 llvm_unreachable("Unsupported load/store instruction for pairing");
492 case RISCV::SW:
493 PairOpc = RISCV::MIPS_SWP;
494 RequiredAlignment = Align(8);
495 break;
496 case RISCV::LW:
497 PairOpc = RISCV::MIPS_LWP;
498 RequiredAlignment = Align(8);
499 break;
500 case RISCV::SD:
501 PairOpc = RISCV::MIPS_SDP;
502 RequiredAlignment = Align(16);
503 break;
504 case RISCV::LD:
505 PairOpc = RISCV::MIPS_LDP;
506 RequiredAlignment = Align(16);
507 break;
508 }
509
510 if (!First->hasOneMemOperand())
511 return false;
512
513 if (!isMemOpAligned(*First, RequiredAlignment))
514 return false;
515
516 int64_t Offset = First->getOperand(2).getImm();
517 if (!isUInt<7>(Offset))
518 return false;
519
520 MachineInstrBuilder MIB = BuildMI(
521 *MF, First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(),
522 TII->get(PairOpc));
523 MIB.add(First->getOperand(0))
524 .add(Second->getOperand(0))
525 .add(First->getOperand(1))
526 .add(First->getOperand(2))
527 .cloneMergedMemRefs({&*First, &*Second});
528
529 First->getParent()->insert(First, MIB);
530
531 First->removeFromParent();
532 Second->removeFromParent();
533
534 return true;
535}
536
537// Merge two adjacent load/store instructions into a paired instruction.
538// This function calls the vendor specific implementation that seelects the
539// appropriate paired opcode, verifies that the memory operand is properly
540// aligned, and checks that the offset is valid. If all conditions are met, it
541// builds and inserts the paired instruction.
542bool RISCVLoadStoreOpt::tryConvertToLdStPair(
544 MachineFunction *MF = First->getMF();
545
546 // Try converting to QC_LWMI/QC_SWMI if the XQCILSM extension is enabled.
547 if (!STI->is64Bit() && STI->hasVendorXqcilsm())
548 return tryConvertToXqcilsmLdStPair(MF, First, Second);
549
550 // Else try to convert them into MIPS Paired Loads/Stores.
551 return tryConvertToMIPSLdStPair(MF, First, Second);
552}
553
554static bool mayAlias(MachineInstr &MIa,
556 AliasAnalysis *AA) {
557 for (MachineInstr *MIb : MemInsns)
558 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
559 return true;
560
561 return false;
562}
563
564// Scan the instructions looking for a load/store that can be combined with the
565// current instruction into a wider equivalent or a load/store pair.
566// TODO: Extend pairing logic to consider reordering both instructions
567// to a safe "middle" position rather than only merging forward/backward.
568// This requires more sophisticated checks for aliasing, register
569// liveness, and potential scheduling hazards.
571RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
572 bool &MergeForward) {
573 MachineBasicBlock::iterator E = I->getParent()->end();
575 MachineInstr &FirstMI = *I;
576 MBBI = next_nodbg(MBBI, E);
577
578 bool MayLoad = FirstMI.mayLoad();
579 Register Reg = FirstMI.getOperand(0).getReg();
580 Register BaseReg = FirstMI.getOperand(1).getReg();
581 int64_t Offset = FirstMI.getOperand(2).getImm();
582 int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
583
584 MergeForward = false;
585
586 // Track which register units have been modified and used between the first
587 // insn (inclusive) and the second insn.
588 ModifiedRegUnits.clear();
589 UsedRegUnits.clear();
590
591 // Remember any instructions that read/write memory between FirstMI and MI.
592 SmallVector<MachineInstr *, 4> MemInsns;
593
594 for (unsigned Count = 0; MBBI != E && Count < LdStLimit;
595 MBBI = next_nodbg(MBBI, E)) {
596 MachineInstr &MI = *MBBI;
597
598 // Don't count transient instructions towards the search limit since there
599 // may be different numbers of them if e.g. debug information is present.
600 if (!MI.isTransient())
601 ++Count;
602
603 if (MI.getOpcode() == FirstMI.getOpcode() &&
604 TII->isLdStSafeToPair(MI, TRI)) {
605 Register MIBaseReg = MI.getOperand(1).getReg();
606 int64_t MIOffset = MI.getOperand(2).getImm();
607
608 if (BaseReg == MIBaseReg) {
609 if ((Offset != MIOffset + OffsetStride) &&
610 (Offset + OffsetStride != MIOffset)) {
611 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
612 TRI);
613 MemInsns.push_back(&MI);
614 continue;
615 }
616
617 // If the destination register of one load is the same register or a
618 // sub/super register of the other load, bail and keep looking.
619 if (MayLoad &&
620 TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) {
621 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
622 TRI);
623 MemInsns.push_back(&MI);
624 continue;
625 }
626
627 // If the BaseReg has been modified, then we cannot do the optimization.
628 if (!ModifiedRegUnits.available(BaseReg))
629 return E;
630
631 // If the Rt of the second instruction was not modified or used between
632 // the two instructions and none of the instructions between the second
633 // and first alias with the second, we can combine the second into the
634 // first.
635 if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) &&
636 !(MI.mayLoad() &&
637 !UsedRegUnits.available(MI.getOperand(0).getReg())) &&
638 !mayAlias(MI, MemInsns, AA)) {
639
640 MergeForward = false;
641 return MBBI;
642 }
643
644 // Likewise, if the Rt of the first instruction is not modified or used
645 // between the two instructions and none of the instructions between the
646 // first and the second alias with the first, we can combine the first
647 // into the second.
648 if (!(MayLoad &&
649 !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) &&
650 !mayAlias(FirstMI, MemInsns, AA)) {
651
652 if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) {
653 MergeForward = true;
654 return MBBI;
655 }
656 }
657 // Unable to combine these instructions due to interference in between.
658 // Keep looking.
659 }
660 }
661
662 // If the instruction wasn't a matching load or store. Stop searching if we
663 // encounter a call instruction that might modify memory.
664 if (MI.isCall())
665 return E;
666
667 // Update modified / uses register units.
668 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
669
670 // Otherwise, if the base register is modified, we have no match, so
671 // return early.
672 if (!ModifiedRegUnits.available(BaseReg))
673 return E;
674
675 // Update list of instructions that read/write memory.
676 if (MI.mayLoadOrStore())
677 MemInsns.push_back(&MI);
678 }
679 return E;
680}
681
683RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
685 bool MergeForward) {
686 MachineBasicBlock::iterator E = I->getParent()->end();
688 // If NextI is the second of the two instructions to be merged, skip one
689 // further for now. For the MIPS load/store, the merge will invalidate the
690 // iterator, and we don't need to scan the new instruction, as it's a pairwise
691 // instruction, which we're not considering for further action anyway. For the
692 // Xqcilsm load/store, we may not want to do this as the second instruction
693 // could possibly be the first in another pair if we do not merge here. This
694 // is handled in the else block after the call to tryConvertToLdStPair below.
695 if (NextI == Paired)
696 NextI = next_nodbg(NextI, E);
697
698 // Insert our new paired instruction after whichever of the paired
699 // instructions MergeForward indicates.
700 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
701 MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired;
702 int Offset = I->getOperand(2).getImm();
703 int PairedOffset = Paired->getOperand(2).getImm();
704 bool InsertAfter = (Offset < PairedOffset) ^ MergeForward;
705
706 if (!MergeForward)
707 Paired->getOperand(1).setIsKill(false);
708
709 // Kill flags may become invalid when moving stores for pairing.
710 if (I->getOperand(0).isUse()) {
711 if (!MergeForward) {
712 // Check if the Paired store's source register has a kill flag and clear
713 // it only if there are intermediate uses between I and Paired.
714 MachineOperand &PairedRegOp = Paired->getOperand(0);
715 if (PairedRegOp.isKill()) {
716 for (auto It = std::next(I); It != Paired; ++It) {
717 if (It->readsRegister(PairedRegOp.getReg(), TRI)) {
718 PairedRegOp.setIsKill(false);
719 break;
720 }
721 }
722 }
723 } else {
724 // Clear kill flags of the first store's register in the forward
725 // direction.
726 Register Reg = I->getOperand(0).getReg();
727 for (MachineInstr &MI : make_range(std::next(I), std::next(Paired)))
728 MI.clearRegisterKills(Reg, TRI);
729 }
730 }
731
732 MachineInstr *ToInsert = DeletionPoint->removeFromParent();
733 MachineBasicBlock &MBB = *InsertionPoint->getParent();
735
736 if (!InsertAfter) {
737 First = MBB.insert(InsertionPoint, ToInsert);
738 Second = InsertionPoint;
739 } else {
740 Second = MBB.insertAfter(InsertionPoint, ToInsert);
741 First = InsertionPoint;
742 }
743
744 if (tryConvertToLdStPair(First, Second)) {
745 LLVM_DEBUG(dbgs() << "Pairing load/store:\n ");
746 LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
747 } else if (!STI->is64Bit() && STI->hasVendorXqcilsm()) {
748 // We were unable to form the pair, so use the next non-debug instruction
749 // after the first instruction we had wanted to merge.
750 NextI = next_nodbg(I, E);
751 }
752
753 return NextI;
754}
755
756//===----------------------------------------------------------------------===//
757// Post reg-alloc zilsd pass implementation
758//===----------------------------------------------------------------------===//
759
760bool RISCVLoadStoreOpt::isValidZilsdRegPair(Register First, Register Second) {
761 // Special case: First register can not be zero unless both registers are
762 // zeros.
763 // Spec says: LD instructions with destination x0 are processed as any other
764 // load, but the result is discarded entirely and x1 is not written. If using
765 // x0 as src of SD, the entire 64-bit operand is zero — i.e., register x1 is
766 // not accessed.
767 if (First == RISCV::X0)
768 return Second == RISCV::X0;
769
770 // Check if registers form a valid even/odd pair for Zilsd
771 unsigned FirstNum = TRI->getEncodingValue(First);
772 unsigned SecondNum = TRI->getEncodingValue(Second);
773
774 // Must be consecutive and first must be even
775 return (FirstNum % 2 == 0) && (SecondNum == FirstNum + 1);
776}
777
778void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
780 bool IsLoad) {
781 MachineInstr *MI = &*MBBI;
782 DebugLoc DL = MI->getDebugLoc();
783
784 const MachineOperand &FirstOp = MI->getOperand(0);
785 const MachineOperand &SecondOp = MI->getOperand(1);
786 const MachineOperand &BaseOp = MI->getOperand(2);
787 Register FirstReg = FirstOp.getReg();
788 Register SecondReg = SecondOp.getReg();
789 Register BaseReg = BaseOp.getReg();
790
791 // Handle both immediate and symbolic operands for offset
792 const MachineOperand &OffsetOp = MI->getOperand(3);
793 int BaseOffset;
794 if (OffsetOp.isImm())
795 BaseOffset = OffsetOp.getImm();
796 else
797 // For symbolic operands, extract the embedded offset
798 BaseOffset = OffsetOp.getOffset();
799
800 unsigned Opc = IsLoad ? RISCV::LW : RISCV::SW;
801 MachineInstrBuilder MIB1, MIB2;
802
803 // Create two separate instructions
804 if (IsLoad) {
805 // It's possible that first register is same as base register, when we split
806 // it becomes incorrect because base register is overwritten, e.g.
807 // X10, X13 = PseudoLD_RV32_OPT killed X10, 0
808 // =>
809 // X10 = LW X10, 0
810 // X13 = LW killed X10, 4
811 // we can just switch the order to resolve that:
812 // X13 = LW X10, 4
813 // X10 = LW killed X10, 0
814 if (FirstReg == BaseReg) {
815 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
816 .addReg(SecondReg,
818 .addReg(BaseReg);
819 MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
820 .addReg(FirstReg,
822 .addReg(BaseReg, getKillRegState(BaseOp.isKill()));
823
824 } else {
825 MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
826 .addReg(FirstReg,
828 .addReg(BaseReg);
829
830 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
831 .addReg(SecondReg,
833 .addReg(BaseReg, getKillRegState(BaseOp.isKill()));
834 }
835
836 ++NumLD2LW;
837 LLVM_DEBUG(dbgs() << "Split LD back to two LW instructions\n");
838 } else {
839 assert(
840 FirstReg != SecondReg &&
841 "First register and second register is impossible to be same register");
842 MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
843 .addReg(FirstReg, getKillRegState(FirstOp.isKill()))
844 .addReg(BaseReg);
845
846 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
847 .addReg(SecondReg, getKillRegState(SecondOp.isKill()))
848 .addReg(BaseReg, getKillRegState(BaseOp.isKill()));
849
850 ++NumSD2SW;
851 LLVM_DEBUG(dbgs() << "Split SD back to two SW instructions\n");
852 }
853
854 // Add offset operands - preserve symbolic references
855 MIB1.add(OffsetOp);
856 if (OffsetOp.isImm())
857 MIB2.addImm(BaseOffset + 4);
858 else if (OffsetOp.isGlobal())
859 MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4,
860 OffsetOp.getTargetFlags());
861 else if (OffsetOp.isCPI())
862 MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4,
863 OffsetOp.getTargetFlags());
864 else if (OffsetOp.isBlockAddress())
865 MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4,
866 OffsetOp.getTargetFlags());
867
868 // Copy memory operands if the original instruction had them
869 // FIXME: This is overly conservative; the new instruction accesses 4 bytes,
870 // not 8.
871 MIB1.cloneMemRefs(*MI);
872 MIB2.cloneMemRefs(*MI);
873
874 // Remove the original paired instruction and update iterator
875 MBBI = MBB.erase(MBBI);
876}
877
878bool RISCVLoadStoreOpt::fixInvalidRegPairOp(MachineBasicBlock &MBB,
880 MachineInstr *MI = &*MBBI;
881 unsigned Opcode = MI->getOpcode();
882
883 // Check if this is a Zilsd pseudo that needs fixing
884 if (Opcode != RISCV::PseudoLD_RV32_OPT && Opcode != RISCV::PseudoSD_RV32_OPT)
885 return false;
886
887 bool IsLoad = Opcode == RISCV::PseudoLD_RV32_OPT;
888
889 const MachineOperand &FirstOp = MI->getOperand(0);
890 const MachineOperand &SecondOp = MI->getOperand(1);
891 Register FirstReg = FirstOp.getReg();
892 Register SecondReg = SecondOp.getReg();
893
894 if (!isValidZilsdRegPair(FirstReg, SecondReg)) {
895 // Need to split back into two instructions
896 splitLdSdIntoTwo(MBB, MBBI, IsLoad);
897 return true;
898 }
899
900 // Registers are valid, convert to real LD/SD instruction
901 const MachineOperand &BaseOp = MI->getOperand(2);
902 Register BaseReg = BaseOp.getReg();
903 DebugLoc DL = MI->getDebugLoc();
904 // Handle both immediate and symbolic operands for offset
905 const MachineOperand &OffsetOp = MI->getOperand(3);
906
907 unsigned RealOpc = IsLoad ? RISCV::LD_RV32 : RISCV::SD_RV32;
908
909 // Create register pair from the two individual registers
910 unsigned RegPair = TRI->getMatchingSuperReg(FirstReg, RISCV::sub_gpr_even,
911 &RISCV::GPRPairRegClass);
912 // Create the real LD/SD instruction with register pair
913 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(RealOpc));
914
915 if (IsLoad) {
916 // For LD, the register pair is the destination
917 MIB.addReg(RegPair, RegState::Define | getDeadRegState(FirstOp.isDead() &&
918 SecondOp.isDead()));
919 } else {
920 // For SD, the register pair is the source
921 MIB.addReg(RegPair, getKillRegState(FirstOp.isKill() && SecondOp.isKill()));
922 }
923
924 MIB.addReg(BaseReg, getKillRegState(BaseOp.isKill()))
925 .add(OffsetOp)
926 .cloneMemRefs(*MI);
927
928 LLVM_DEBUG(dbgs() << "Converted pseudo to real instruction: " << *MIB
929 << "\n");
930
931 // Remove the pseudo instruction and update iterator
932 MBBI = MBB.erase(MBBI);
933
934 return true;
935}
936
937// Returns an instance of the Load / Store Optimization pass.
939 return new RISCVLoadStoreOpt();
940}
unsigned const MachineRegisterInfo * MRI
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define RISCV_LOAD_STORE_OPT_NAME
static cl::opt< unsigned > LdStLimit("riscv-load-store-scan-limit", cl::init(128), cl::Hidden)
static bool isMemOpAligned(MachineInstr &MI, Align RequiredAlignment)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
AnalysisUsage & addRequired()
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void clear()
Clears the set.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
FunctionPass * createRISCVLoadStoreOptPass()
@ Offset
Definition DWP.cpp:532
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getDeadRegState(bool B)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
unsigned getKillRegState(bool B)
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39