LLVM 17.0.0git
RISCVMergeBaseOffset.cpp
Go to the documentation of this file.
1//===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Merge the offset of address calculation into the offset field
10// of instructions in a global address lowering sequence.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCV.h"
15#include "RISCVTargetMachine.h"
17#include "llvm/CodeGen/Passes.h"
19#include "llvm/Support/Debug.h"
21#include <optional>
22#include <set>
23using namespace llvm;
24
25#define DEBUG_TYPE "riscv-merge-base-offset"
26#define RISCV_MERGE_BASE_OFFSET_NAME "RISC-V Merge Base Offset"
27namespace {
28
29class RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
30 const RISCVSubtarget *ST = nullptr;
32
33public:
34 static char ID;
35 bool runOnMachineFunction(MachineFunction &Fn) override;
36 bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo);
37
38 bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo);
39 void foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
40 int64_t Offset);
41 bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo,
42 MachineInstr &TailAdd, Register GSReg);
43 bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo,
44 MachineInstr &TailShXAdd, Register GSReg);
45
46 bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo);
47
48 RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
49
52 MachineFunctionProperties::Property::IsSSA);
53 }
54
55 void getAnalysisUsage(AnalysisUsage &AU) const override {
56 AU.setPreservesCFG();
58 }
59
60 StringRef getPassName() const override {
62 }
63};
64} // end anonymous namespace
65
66char RISCVMergeBaseOffsetOpt::ID = 0;
67INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
68 RISCV_MERGE_BASE_OFFSET_NAME, false, false)
69
70// Detect either of the patterns:
71//
72// 1. (medlow pattern):
73// lui vreg1, %hi(s)
74// addi vreg2, vreg1, %lo(s)
75//
76// 2. (medany pattern):
77// .Lpcrel_hi1:
78// auipc vreg1, %pcrel_hi(s)
79// addi vreg2, vreg1, %pcrel_lo(.Lpcrel_hi1)
80//
81// The pattern is only accepted if:
82// 1) The first instruction has only one use, which is the ADDI.
83// 2) The address operands have the appropriate type, reflecting the
84// lowering of a global address or constant pool using medlow or medany.
85// 3) The offset value in the Global Address or Constant Pool is 0.
86bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
87 MachineInstr *&Lo) {
88 if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
89 return false;
90
91 const MachineOperand &HiOp1 = Hi.getOperand(1);
92 unsigned ExpectedFlags =
93 Hi.getOpcode() == RISCV::AUIPC ? RISCVII::MO_PCREL_HI : RISCVII::MO_HI;
94 if (HiOp1.getTargetFlags() != ExpectedFlags)
95 return false;
96
97 if (!(HiOp1.isGlobal() || HiOp1.isCPI()) || HiOp1.getOffset() != 0)
98 return false;
99
100 Register HiDestReg = Hi.getOperand(0).getReg();
101 if (!MRI->hasOneUse(HiDestReg))
102 return false;
103
104 Lo = &*MRI->use_instr_begin(HiDestReg);
105 if (Lo->getOpcode() != RISCV::ADDI)
106 return false;
107
108 const MachineOperand &LoOp2 = Lo->getOperand(2);
109 if (Hi.getOpcode() == RISCV::LUI) {
110 if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
111 !(LoOp2.isGlobal() || LoOp2.isCPI()) || LoOp2.getOffset() != 0)
112 return false;
113 } else {
114 assert(Hi.getOpcode() == RISCV::AUIPC);
115 if (LoOp2.getTargetFlags() != RISCVII::MO_PCREL_LO ||
117 return false;
118 }
119
120 if (HiOp1.isGlobal()) {
121 LLVM_DEBUG(dbgs() << " Found lowered global address: "
122 << *HiOp1.getGlobal() << "\n");
123 } else {
124 assert(HiOp1.isCPI());
125 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex()
126 << "\n");
127 }
128
129 return true;
130}
131
132// Update the offset in Hi and Lo instructions.
133// Delete the tail instruction and update all the uses to use the
134// output from Lo.
135void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
136 MachineInstr &Tail, int64_t Offset) {
137 assert(isInt<32>(Offset) && "Unexpected offset");
138 // Put the offset back in Hi and the Lo
139 Hi.getOperand(1).setOffset(Offset);
140 if (Hi.getOpcode() != RISCV::AUIPC)
141 Lo.getOperand(2).setOffset(Offset);
142 // Delete the tail instruction.
143 MRI->replaceRegWith(Tail.getOperand(0).getReg(), Lo.getOperand(0).getReg());
144 Tail.eraseFromParent();
145 LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
146 << " " << Hi << " " << Lo;);
147}
148
149// Detect patterns for large offsets that are passed into an ADD instruction.
150// If the pattern is found, updates the offset in Hi and Lo instructions
151// and deletes TailAdd and the instructions that produced the offset.
152//
153// Base address lowering is of the form:
154// Hi: lui vreg1, %hi(s)
155// Lo: addi vreg2, vreg1, %lo(s)
156// / \
157// / \
158// / \
159// / The large offset can be of two forms: \
160// 1) Offset that has non zero bits in lower 2) Offset that has non zero
161// 12 bits and upper 20 bits bits in upper 20 bits only
162// OffseLUI: lui vreg3, 4
163// OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128
164// \ /
165// \ /
166// \ /
167// \ /
168// TailAdd: add vreg4, vreg2, voff
169bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
171 MachineInstr &TailAdd,
172 Register GAReg) {
173 assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
174 Register Rs = TailAdd.getOperand(1).getReg();
175 Register Rt = TailAdd.getOperand(2).getReg();
176 Register Reg = Rs == GAReg ? Rt : Rs;
177
178 // Can't fold if the register has more than one use.
179 if (!MRI->hasOneUse(Reg))
180 return false;
181 // This can point to an ADDI(W) or a LUI:
182 MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
183 if (OffsetTail.getOpcode() == RISCV::ADDI ||
184 OffsetTail.getOpcode() == RISCV::ADDIW) {
185 // The offset value has non zero bits in both %hi and %lo parts.
186 // Detect an ADDI that feeds from a LUI instruction.
187 MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
188 if (AddiImmOp.getTargetFlags() != RISCVII::MO_None)
189 return false;
190 int64_t OffLo = AddiImmOp.getImm();
191 MachineInstr &OffsetLui =
192 *MRI->getVRegDef(OffsetTail.getOperand(1).getReg());
193 MachineOperand &LuiImmOp = OffsetLui.getOperand(1);
194 if (OffsetLui.getOpcode() != RISCV::LUI ||
195 LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
196 !MRI->hasOneUse(OffsetLui.getOperand(0).getReg()))
197 return false;
198 int64_t Offset = SignExtend64<32>(LuiImmOp.getImm() << 12);
199 Offset += OffLo;
200 // RV32 ignores the upper 32 bits. ADDIW sign extends the result.
201 if (!ST->is64Bit() || OffsetTail.getOpcode() == RISCV::ADDIW)
202 Offset = SignExtend64<32>(Offset);
203 // We can only fold simm32 offsets.
204 if (!isInt<32>(Offset))
205 return false;
206 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
207 << " " << OffsetLui);
208 foldOffset(Hi, Lo, TailAdd, Offset);
209 OffsetTail.eraseFromParent();
210 OffsetLui.eraseFromParent();
211 return true;
212 } else if (OffsetTail.getOpcode() == RISCV::LUI) {
213 // The offset value has all zero bits in the lower 12 bits. Only LUI
214 // exists.
215 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
216 int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
217 foldOffset(Hi, Lo, TailAdd, Offset);
218 OffsetTail.eraseFromParent();
219 return true;
220 }
221 return false;
222}
223
224// Detect patterns for offsets that are passed into a SHXADD instruction.
225// The offset has 1, 2, or 3 trailing zeros and fits in simm13, simm14, simm15.
226// The constant is created with addi voff, x0, C, and shXadd is used to
227// fill insert the trailing zeros and do the addition.
228// If the pattern is found, updates the offset in Hi and Lo instructions
229// and deletes TailShXAdd and the instructions that produced the offset.
230//
231// Hi: lui vreg1, %hi(s)
232// Lo: addi vreg2, vreg1, %lo(s)
233// OffsetTail: addi voff, x0, C
234// TailAdd: shXadd vreg4, voff, vreg2
235bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi,
237 MachineInstr &TailShXAdd,
238 Register GAReg) {
239 assert((TailShXAdd.getOpcode() == RISCV::SH1ADD ||
240 TailShXAdd.getOpcode() == RISCV::SH2ADD ||
241 TailShXAdd.getOpcode() == RISCV::SH3ADD) &&
242 "Expected SHXADD instruction!");
243
244 // The first source is the shifted operand.
245 Register Rs1 = TailShXAdd.getOperand(1).getReg();
246
247 if (GAReg != TailShXAdd.getOperand(2).getReg())
248 return false;
249
250 // Can't fold if the register has more than one use.
251 if (!MRI->hasOneUse(Rs1))
252 return false;
253 // This can point to an ADDI X0, C.
254 MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1);
255 if (OffsetTail.getOpcode() != RISCV::ADDI)
256 return false;
257 if (!OffsetTail.getOperand(1).isReg() ||
258 OffsetTail.getOperand(1).getReg() != RISCV::X0 ||
259 !OffsetTail.getOperand(2).isImm())
260 return false;
261
262 int64_t Offset = OffsetTail.getOperand(2).getImm();
263 assert(isInt<12>(Offset) && "Unexpected offset");
264
265 unsigned ShAmt;
266 switch (TailShXAdd.getOpcode()) {
267 default: llvm_unreachable("Unexpected opcode");
268 case RISCV::SH1ADD: ShAmt = 1; break;
269 case RISCV::SH2ADD: ShAmt = 2; break;
270 case RISCV::SH3ADD: ShAmt = 3; break;
271 }
272
273 Offset = (uint64_t)Offset << ShAmt;
274
275 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
276 foldOffset(Hi, Lo, TailShXAdd, Offset);
277 OffsetTail.eraseFromParent();
278 return true;
279}
280
281bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
282 MachineInstr &Lo) {
283 Register DestReg = Lo.getOperand(0).getReg();
284
285 // Look for arithmetic instructions we can get an offset from.
286 // We might be able to remove the arithmetic instructions by folding the
287 // offset into the LUI+ADDI.
288 if (!MRI->hasOneUse(DestReg))
289 return false;
290
291 // Lo has only one use.
292 MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
293 switch (Tail.getOpcode()) {
294 default:
295 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
296 << Tail);
297 break;
298 case RISCV::ADDI: {
299 // Offset is simply an immediate operand.
300 int64_t Offset = Tail.getOperand(2).getImm();
301
302 // We might have two ADDIs in a row.
303 Register TailDestReg = Tail.getOperand(0).getReg();
304 if (MRI->hasOneUse(TailDestReg)) {
305 MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
306 if (TailTail.getOpcode() == RISCV::ADDI) {
307 Offset += TailTail.getOperand(2).getImm();
308 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
309 foldOffset(Hi, Lo, TailTail, Offset);
310 Tail.eraseFromParent();
311 return true;
312 }
313 }
314
315 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
316 foldOffset(Hi, Lo, Tail, Offset);
317 return true;
318 }
319 case RISCV::ADD:
320 // The offset is too large to fit in the immediate field of ADDI.
321 // This can be in two forms:
322 // 1) LUI hi_Offset followed by:
323 // ADDI lo_offset
324 // This happens in case the offset has non zero bits in
325 // both hi 20 and lo 12 bits.
326 // 2) LUI (offset20)
327 // This happens in case the lower 12 bits of the offset are zeros.
328 return foldLargeOffset(Hi, Lo, Tail, DestReg);
329 case RISCV::SH1ADD:
330 case RISCV::SH2ADD:
331 case RISCV::SH3ADD:
332 // The offset is too large to fit in the immediate field of ADDI.
333 // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
334 // (SH3ADD (ADDI X0, C), DestReg).
335 return foldShiftedOffset(Hi, Lo, Tail, DestReg);
336 }
337
338 return false;
339}
340
341bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
342 MachineInstr &Lo) {
343 Register DestReg = Lo.getOperand(0).getReg();
344
345 // If all the uses are memory ops with the same offset, we can transform:
346 //
347 // 1. (medlow pattern):
348 // Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
349 // Lo: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
350 // Tail: lw vreg3, 8(vreg2)
351 //
352 // 2. (medany pattern):
353 // Hi: 1:auipc vreg1, %pcrel_hi(s) ---> auipc vreg1, %pcrel_hi(foo+8)
354 // Lo: addi vreg2, vreg1, %pcrel_lo(1b) ---> lw vreg3, %pcrel_lo(1b)(vreg1)
355 // Tail: lw vreg3, 8(vreg2)
356
357 std::optional<int64_t> CommonOffset;
358 for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
359 switch (UseMI.getOpcode()) {
360 default:
361 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
362 return false;
363 case RISCV::LB:
364 case RISCV::LH:
365 case RISCV::LW:
366 case RISCV::LBU:
367 case RISCV::LHU:
368 case RISCV::LWU:
369 case RISCV::LD:
370 case RISCV::FLH:
371 case RISCV::FLW:
372 case RISCV::FLD:
373 case RISCV::SB:
374 case RISCV::SH:
375 case RISCV::SW:
376 case RISCV::SD:
377 case RISCV::FSH:
378 case RISCV::FSW:
379 case RISCV::FSD: {
380 if (UseMI.getOperand(1).isFI())
381 return false;
382 // Register defined by Lo should not be the value register.
383 if (DestReg == UseMI.getOperand(0).getReg())
384 return false;
385 assert(DestReg == UseMI.getOperand(1).getReg() &&
386 "Expected base address use");
387 // All load/store instructions must use the same offset.
388 int64_t Offset = UseMI.getOperand(2).getImm();
389 if (CommonOffset && Offset != CommonOffset)
390 return false;
391 CommonOffset = Offset;
392 }
393 }
394 }
395
396 // We found a common offset.
397 // Update the offsets in global address lowering.
398 // We may have already folded some arithmetic so we need to add to any
399 // existing offset.
400 int64_t NewOffset = Hi.getOperand(1).getOffset() + *CommonOffset;
401 // RV32 ignores the upper 32 bits.
402 if (!ST->is64Bit())
403 NewOffset = SignExtend64<32>(NewOffset);
404 // We can only fold simm32 offsets.
405 if (!isInt<32>(NewOffset))
406 return false;
407
408 Hi.getOperand(1).setOffset(NewOffset);
409 MachineOperand &ImmOp = Lo.getOperand(2);
410 if (Hi.getOpcode() != RISCV::AUIPC)
411 ImmOp.setOffset(NewOffset);
412
413 // Update the immediate in the load/store instructions to add the offset.
414 for (MachineInstr &UseMI :
415 llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
416 UseMI.removeOperand(2);
417 UseMI.addOperand(ImmOp);
418 // Update the base reg in the Tail instruction to feed from LUI.
419 // Output of Hi is only used in Lo, no need to use MRI->replaceRegWith().
420 UseMI.getOperand(1).setReg(Hi.getOperand(0).getReg());
421 }
422
423 Lo.eraseFromParent();
424 return true;
425}
426
427bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
428 if (skipFunction(Fn.getFunction()))
429 return false;
430
432
433 bool MadeChange = false;
434 MRI = &Fn.getRegInfo();
435 for (MachineBasicBlock &MBB : Fn) {
436 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
437 for (MachineInstr &Hi : MBB) {
438 MachineInstr *Lo = nullptr;
439 if (!detectFoldable(Hi, Lo))
440 continue;
441 MadeChange |= detectAndFoldOffset(Hi, *Lo);
442 MadeChange |= foldIntoMemoryOps(Hi, *Lo);
443 }
444 }
445
446 return MadeChange;
447}
448
449/// Returns an instance of the Merge Base Offset Optimization pass.
451 return new RISCVMergeBaseOffsetOpt();
452}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define RISCV_MERGE_BASE_OFFSET_NAME
#define DEBUG_TYPE
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Register getReg() const
getReg - Returns the register number.
@ MO_MCSymbol
MCSymbol reference (for debug/eh info)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
FunctionPass * createRISCVMergeBaseOffsetOptPass()
Returns an instance of the Merge Base Offset Optimization pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163