LLVM 22.0.0git
RISCVOptWInstrs.cpp
Go to the documentation of this file.
1//===- RISCVOptWInstrs.cpp - MI W instruction optimizations ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass does some optimizations for *W instructions at the MI level.
10//
11// First it removes unneeded sext.w instructions. Either because the sign
12// extended bits aren't consumed or because the input was already sign extended
13// by an earlier instruction.
14//
15// Then:
16// 1. Unless explicit disabled or the target prefers instructions with W suffix,
17// it removes the -w suffix from opw instructions whenever all users are
18// dependent only on the lower word of the result of the instruction.
19// The cases handled are:
20// * addw because c.add has a larger register encoding than c.addw.
21// * addiw because it helps reduce test differences between RV32 and RV64
22// w/o being a pessimization.
23// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
24// * slliw because c.slliw doesn't exist and c.slli does
25//
26// 2. Or if explicit enabled or the target prefers instructions with W suffix,
27// it adds the W suffix to the instruction whenever all users are dependent
28// only on the lower word of the result of the instruction.
29// The cases handled are:
30// * add/addi/sub/mul.
31// * slli with imm < 32.
32// * ld/lwu.
33//===---------------------------------------------------------------------===//
34
35#include "RISCV.h"
37#include "RISCVSubtarget.h"
38#include "llvm/ADT/SmallSet.h"
39#include "llvm/ADT/Statistic.h"
42
43using namespace llvm;
44
45#define DEBUG_TYPE "riscv-opt-w-instrs"
46#define RISCV_OPT_W_INSTRS_NAME "RISC-V Optimize W Instructions"
47
48STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
49STATISTIC(NumTransformedToWInstrs,
50 "Number of instructions transformed to W-ops");
51STATISTIC(NumTransformedToNonWInstrs,
52 "Number of instructions transformed to non-W-ops");
53
54static cl::opt<bool> DisableSExtWRemoval("riscv-disable-sextw-removal",
55 cl::desc("Disable removal of sext.w"),
56 cl::init(false), cl::Hidden);
57static cl::opt<bool> DisableStripWSuffix("riscv-disable-strip-w-suffix",
58 cl::desc("Disable strip W suffix"),
59 cl::init(false), cl::Hidden);
60
61namespace {
62
63class RISCVOptWInstrs : public MachineFunctionPass {
64public:
65 static char ID;
66
67 RISCVOptWInstrs() : MachineFunctionPass(ID) {}
68
69 bool runOnMachineFunction(MachineFunction &MF) override;
70 bool removeSExtWInstrs(MachineFunction &MF, const RISCVInstrInfo &TII,
71 const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
72 bool canonicalizeWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
73 const RISCVSubtarget &ST,
74 MachineRegisterInfo &MRI);
75
76 void getAnalysisUsage(AnalysisUsage &AU) const override {
77 AU.setPreservesCFG();
79 }
80
81 StringRef getPassName() const override { return RISCV_OPT_W_INSTRS_NAME; }
82};
83
84} // end anonymous namespace
85
86char RISCVOptWInstrs::ID = 0;
88 false)
89
91 return new RISCVOptWInstrs();
92}
93
95 unsigned Bits) {
96 const MachineInstr &MI = *UserOp.getParent();
97 unsigned MCOpcode = RISCV::getRVVMCOpcode(MI.getOpcode());
98
99 if (!MCOpcode)
100 return false;
101
102 const MCInstrDesc &MCID = MI.getDesc();
103 const uint64_t TSFlags = MCID.TSFlags;
104 if (!RISCVII::hasSEWOp(TSFlags))
105 return false;
106 assert(RISCVII::hasVLOp(TSFlags));
107 const unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MCID)).getImm();
108
109 if (UserOp.getOperandNo() == RISCVII::getVLOpNum(MCID))
110 return false;
111
112 auto NumDemandedBits =
113 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
114 return NumDemandedBits && Bits >= *NumDemandedBits;
115}
116
117// Checks if all users only demand the lower \p OrigBits of the original
118// instruction's result.
119// TODO: handle multiple interdependent transformations
120static bool hasAllNBitUsers(const MachineInstr &OrigMI,
121 const RISCVSubtarget &ST,
122 const MachineRegisterInfo &MRI, unsigned OrigBits) {
123
126
127 Worklist.emplace_back(&OrigMI, OrigBits);
128
129 while (!Worklist.empty()) {
130 auto P = Worklist.pop_back_val();
131 const MachineInstr *MI = P.first;
132 unsigned Bits = P.second;
133
134 if (!Visited.insert(P).second)
135 continue;
136
137 // Only handle instructions with one def.
138 if (MI->getNumExplicitDefs() != 1)
139 return false;
140
141 Register DestReg = MI->getOperand(0).getReg();
142 if (!DestReg.isVirtual())
143 return false;
144
145 for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) {
146 const MachineInstr *UserMI = UserOp.getParent();
147 unsigned OpIdx = UserOp.getOperandNo();
148
149 switch (UserMI->getOpcode()) {
150 default:
151 if (vectorPseudoHasAllNBitUsers(UserOp, Bits))
152 break;
153 return false;
154
155 case RISCV::ADDIW:
156 case RISCV::ADDW:
157 case RISCV::DIVUW:
158 case RISCV::DIVW:
159 case RISCV::MULW:
160 case RISCV::REMUW:
161 case RISCV::REMW:
162 case RISCV::SLLW:
163 case RISCV::SRAIW:
164 case RISCV::SRAW:
165 case RISCV::SRLIW:
166 case RISCV::SRLW:
167 case RISCV::SUBW:
168 case RISCV::ROLW:
169 case RISCV::RORW:
170 case RISCV::RORIW:
171 case RISCV::CLSW:
172 case RISCV::CLZW:
173 case RISCV::CTZW:
174 case RISCV::CPOPW:
175 case RISCV::SLLI_UW:
176 case RISCV::ABSW:
177 case RISCV::FMV_W_X:
178 case RISCV::FCVT_H_W:
179 case RISCV::FCVT_H_W_INX:
180 case RISCV::FCVT_H_WU:
181 case RISCV::FCVT_H_WU_INX:
182 case RISCV::FCVT_S_W:
183 case RISCV::FCVT_S_W_INX:
184 case RISCV::FCVT_S_WU:
185 case RISCV::FCVT_S_WU_INX:
186 case RISCV::FCVT_D_W:
187 case RISCV::FCVT_D_W_INX:
188 case RISCV::FCVT_D_WU:
189 case RISCV::FCVT_D_WU_INX:
190 if (Bits >= 32)
191 break;
192 return false;
193
194 case RISCV::SEXT_B:
195 case RISCV::PACKH:
196 if (Bits >= 8)
197 break;
198 return false;
199 case RISCV::SEXT_H:
200 case RISCV::FMV_H_X:
201 case RISCV::ZEXT_H_RV32:
202 case RISCV::ZEXT_H_RV64:
203 case RISCV::PACKW:
204 if (Bits >= 16)
205 break;
206 return false;
207
208 case RISCV::PACK:
209 if (Bits >= (ST.getXLen() / 2))
210 break;
211 return false;
212
213 case RISCV::SRLI: {
214 // If we are shifting right by less than Bits, and users don't demand
215 // any bits that were shifted into [Bits-1:0], then we can consider this
216 // as an N-Bit user.
217 unsigned ShAmt = UserMI->getOperand(2).getImm();
218 if (Bits > ShAmt) {
219 Worklist.emplace_back(UserMI, Bits - ShAmt);
220 break;
221 }
222 return false;
223 }
224
225 // these overwrite higher input bits, otherwise the lower word of output
226 // depends only on the lower word of input. So check their uses read W.
227 case RISCV::SLLI: {
228 unsigned ShAmt = UserMI->getOperand(2).getImm();
229 if (Bits >= (ST.getXLen() - ShAmt))
230 break;
231 Worklist.emplace_back(UserMI, Bits + ShAmt);
232 break;
233 }
234 case RISCV::SLLIW: {
235 unsigned ShAmt = UserMI->getOperand(2).getImm();
236 if (Bits >= 32 - ShAmt)
237 break;
238 Worklist.emplace_back(UserMI, Bits + ShAmt);
239 break;
240 }
241
242 case RISCV::ANDI: {
243 uint64_t Imm = UserMI->getOperand(2).getImm();
244 if (Bits >= (unsigned)llvm::bit_width(Imm))
245 break;
246 Worklist.emplace_back(UserMI, Bits);
247 break;
248 }
249 case RISCV::ORI: {
250 uint64_t Imm = UserMI->getOperand(2).getImm();
251 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
252 break;
253 Worklist.emplace_back(UserMI, Bits);
254 break;
255 }
256
257 case RISCV::SLL:
258 case RISCV::BSET:
259 case RISCV::BCLR:
260 case RISCV::BINV:
261 // Operand 2 is the shift amount which uses log2(xlen) bits.
262 if (OpIdx == 2) {
263 if (Bits >= Log2_32(ST.getXLen()))
264 break;
265 return false;
266 }
267 Worklist.emplace_back(UserMI, Bits);
268 break;
269
270 case RISCV::SRA:
271 case RISCV::SRL:
272 case RISCV::ROL:
273 case RISCV::ROR:
274 // Operand 2 is the shift amount which uses 6 bits.
275 if (OpIdx == 2 && Bits >= Log2_32(ST.getXLen()))
276 break;
277 return false;
278
279 case RISCV::ADD_UW:
280 case RISCV::SH1ADD_UW:
281 case RISCV::SH2ADD_UW:
282 case RISCV::SH3ADD_UW:
283 // Operand 1 is implicitly zero extended.
284 if (OpIdx == 1 && Bits >= 32)
285 break;
286 Worklist.emplace_back(UserMI, Bits);
287 break;
288
289 case RISCV::BEXTI:
290 if (UserMI->getOperand(2).getImm() >= Bits)
291 return false;
292 break;
293
294 case RISCV::SB:
295 // The first argument is the value to store.
296 if (OpIdx == 0 && Bits >= 8)
297 break;
298 return false;
299 case RISCV::SH:
300 // The first argument is the value to store.
301 if (OpIdx == 0 && Bits >= 16)
302 break;
303 return false;
304 case RISCV::SW:
305 // The first argument is the value to store.
306 if (OpIdx == 0 && Bits >= 32)
307 break;
308 return false;
309
310 // For these, lower word of output in these operations, depends only on
311 // the lower word of input. So, we check all uses only read lower word.
312 case RISCV::COPY:
313 case RISCV::PHI:
314
315 case RISCV::ADD:
316 case RISCV::ADDI:
317 case RISCV::AND:
318 case RISCV::MUL:
319 case RISCV::OR:
320 case RISCV::SUB:
321 case RISCV::XOR:
322 case RISCV::XORI:
323
324 case RISCV::ANDN:
325 case RISCV::CLMUL:
326 case RISCV::ORN:
327 case RISCV::SH1ADD:
328 case RISCV::SH2ADD:
329 case RISCV::SH3ADD:
330 case RISCV::XNOR:
331 case RISCV::BSETI:
332 case RISCV::BCLRI:
333 case RISCV::BINVI:
334 Worklist.emplace_back(UserMI, Bits);
335 break;
336
337 case RISCV::BREV8:
338 case RISCV::ORC_B:
339 // BREV8 and ORC_B work on bytes. Round Bits down to the nearest byte.
340 Worklist.emplace_back(UserMI, alignDown(Bits, 8));
341 break;
342
343 case RISCV::PseudoCCMOVGPR:
344 case RISCV::PseudoCCMOVGPRNoX0:
345 // Either operand 4 or operand 5 is returned by this instruction. If
346 // only the lower word of the result is used, then only the lower word
347 // of operand 4 and 5 is used.
348 if (OpIdx != 4 && OpIdx != 5)
349 return false;
350 Worklist.emplace_back(UserMI, Bits);
351 break;
352
353 case RISCV::CZERO_EQZ:
354 case RISCV::CZERO_NEZ:
355 case RISCV::VT_MASKC:
356 case RISCV::VT_MASKCN:
357 if (OpIdx != 1)
358 return false;
359 Worklist.emplace_back(UserMI, Bits);
360 break;
361 case RISCV::TH_EXT:
362 case RISCV::TH_EXTU:
363 unsigned Msb = UserMI->getOperand(2).getImm();
364 unsigned Lsb = UserMI->getOperand(3).getImm();
365 // Behavior of Msb < Lsb is not well documented.
366 if (Msb >= Lsb && Bits > Msb)
367 break;
368 return false;
369 }
370 }
371 }
372
373 return true;
374}
375
376static bool hasAllWUsers(const MachineInstr &OrigMI, const RISCVSubtarget &ST,
377 const MachineRegisterInfo &MRI) {
378 return hasAllNBitUsers(OrigMI, ST, MRI, 32);
379}
380
381// This function returns true if the machine instruction always outputs a value
382// where bits 63:32 match bit 31.
383static bool isSignExtendingOpW(const MachineInstr &MI, unsigned OpNo) {
384 uint64_t TSFlags = MI.getDesc().TSFlags;
385
386 // Instructions that can be determined from opcode are marked in tablegen.
388 return true;
389
390 // Special cases that require checking operands.
391 switch (MI.getOpcode()) {
392 // shifting right sufficiently makes the value 32-bit sign-extended
393 case RISCV::SRAI:
394 return MI.getOperand(2).getImm() >= 32;
395 case RISCV::SRLI:
396 return MI.getOperand(2).getImm() > 32;
397 // The LI pattern ADDI rd, X0, imm is sign extended.
398 case RISCV::ADDI:
399 return MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0;
400 // An ANDI with an 11 bit immediate will zero bits 63:11.
401 case RISCV::ANDI:
402 return isUInt<11>(MI.getOperand(2).getImm());
403 // An ORI with an >11 bit immediate (negative 12-bit) will set bits 63:11.
404 case RISCV::ORI:
405 return !isUInt<11>(MI.getOperand(2).getImm());
406 // A bseti with X0 is sign extended if the immediate is less than 31.
407 case RISCV::BSETI:
408 return MI.getOperand(2).getImm() < 31 &&
409 MI.getOperand(1).getReg() == RISCV::X0;
410 // Copying from X0 produces zero.
411 case RISCV::COPY:
412 return MI.getOperand(1).getReg() == RISCV::X0;
413 // Ignore the scratch register destination.
414 case RISCV::PseudoAtomicLoadNand32:
415 return OpNo == 0;
416 case RISCV::PseudoVMV_X_S: {
417 // vmv.x.s has at least 33 sign bits if log2(sew) <= 5.
418 int64_t Log2SEW = MI.getOperand(2).getImm();
419 assert(Log2SEW >= 3 && Log2SEW <= 6 && "Unexpected Log2SEW");
420 return Log2SEW <= 5;
421 }
422 case RISCV::TH_EXT: {
423 unsigned Msb = MI.getOperand(2).getImm();
424 unsigned Lsb = MI.getOperand(3).getImm();
425 return Msb >= Lsb && (Msb - Lsb + 1) <= 32;
426 }
427 case RISCV::TH_EXTU: {
428 unsigned Msb = MI.getOperand(2).getImm();
429 unsigned Lsb = MI.getOperand(3).getImm();
430 return Msb >= Lsb && (Msb - Lsb + 1) < 32;
431 }
432 }
433
434 return false;
435}
436
437static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
440 SmallSet<Register, 4> Visited;
442
443 auto AddRegToWorkList = [&](Register SrcReg) {
444 if (!SrcReg.isVirtual())
445 return false;
446 Worklist.push_back(SrcReg);
447 return true;
448 };
449
450 if (!AddRegToWorkList(SrcReg))
451 return false;
452
453 while (!Worklist.empty()) {
454 Register Reg = Worklist.pop_back_val();
455
456 // If we already visited this register, we don't need to check it again.
457 if (!Visited.insert(Reg).second)
458 continue;
459
460 MachineInstr *MI = MRI.getVRegDef(Reg);
461 if (!MI)
462 continue;
463
464 int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
465 assert(OpNo != -1 && "Couldn't find register");
466
467 // If this is a sign extending operation we don't need to look any further.
468 if (isSignExtendingOpW(*MI, OpNo))
469 continue;
470
471 // Is this an instruction that propagates sign extend?
472 switch (MI->getOpcode()) {
473 default:
474 // Unknown opcode, give up.
475 return false;
476 case RISCV::COPY: {
477 const MachineFunction *MF = MI->getMF();
478 const RISCVMachineFunctionInfo *RVFI =
480
481 // If this is the entry block and the register is livein, see if we know
482 // it is sign extended.
483 if (MI->getParent() == &MF->front()) {
484 Register VReg = MI->getOperand(0).getReg();
485 if (MF->getRegInfo().isLiveIn(VReg) && RVFI->isSExt32Register(VReg))
486 continue;
487 }
488
489 Register CopySrcReg = MI->getOperand(1).getReg();
490 if (CopySrcReg == RISCV::X10) {
491 // For a method return value, we check the ZExt/SExt flags in attribute.
492 // We assume the following code sequence for method call.
493 // PseudoCALL @bar, ...
494 // ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2
495 // %0:gpr = COPY $x10
496 //
497 // We use the PseudoCall to look up the IR function being called to find
498 // its return attributes.
499 const MachineBasicBlock *MBB = MI->getParent();
500 auto II = MI->getIterator();
501 if (II == MBB->instr_begin() ||
502 (--II)->getOpcode() != RISCV::ADJCALLSTACKUP)
503 return false;
504
505 const MachineInstr &CallMI = *(--II);
506 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
507 return false;
508
509 auto *CalleeFn =
511 if (!CalleeFn)
512 return false;
513
514 auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
515 if (!IntTy)
516 return false;
517
518 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
519 unsigned BitWidth = IntTy->getBitWidth();
520 if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
521 (BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))
522 continue;
523 }
524
525 if (!AddRegToWorkList(CopySrcReg))
526 return false;
527
528 break;
529 }
530
531 // For these, we just need to check if the 1st operand is sign extended.
532 case RISCV::BCLRI:
533 case RISCV::BINVI:
534 case RISCV::BSETI:
535 if (MI->getOperand(2).getImm() >= 31)
536 return false;
537 [[fallthrough]];
538 case RISCV::REM:
539 case RISCV::ANDI:
540 case RISCV::ORI:
541 case RISCV::XORI:
542 case RISCV::SRAI:
543 // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
544 // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
545 // Logical operations use a sign extended 12-bit immediate.
546 // Arithmetic shift right can only increase the number of sign bits.
547 if (!AddRegToWorkList(MI->getOperand(1).getReg()))
548 return false;
549
550 break;
551 case RISCV::PseudoCCADDW:
552 case RISCV::PseudoCCADDIW:
553 case RISCV::PseudoCCSUBW:
554 case RISCV::PseudoCCSLLW:
555 case RISCV::PseudoCCSRLW:
556 case RISCV::PseudoCCSRAW:
557 case RISCV::PseudoCCSLLIW:
558 case RISCV::PseudoCCSRLIW:
559 case RISCV::PseudoCCSRAIW:
560 // Returns operand 4 or an ADDW/SUBW/etc. of operands 5 and 6. We only
561 // need to check if operand 4 is sign extended.
562 if (!AddRegToWorkList(MI->getOperand(4).getReg()))
563 return false;
564 break;
565 case RISCV::REMU:
566 case RISCV::AND:
567 case RISCV::OR:
568 case RISCV::XOR:
569 case RISCV::ANDN:
570 case RISCV::ORN:
571 case RISCV::XNOR:
572 case RISCV::MAX:
573 case RISCV::MAXU:
574 case RISCV::MIN:
575 case RISCV::MINU:
576 case RISCV::PseudoCCMOVGPR:
577 case RISCV::PseudoCCMOVGPRNoX0:
578 case RISCV::PseudoCCAND:
579 case RISCV::PseudoCCOR:
580 case RISCV::PseudoCCXOR:
581 case RISCV::PseudoCCANDN:
582 case RISCV::PseudoCCORN:
583 case RISCV::PseudoCCXNOR:
584 case RISCV::PHI: {
585 // If all incoming values are sign-extended, the output of AND, OR, XOR,
586 // MIN, MAX, or PHI is also sign-extended.
587
588 // The input registers for PHI are operand 1, 3, ...
589 // The input registers for PseudoCCMOVGPR(NoX0) are 4 and 5.
590 // The input registers for PseudoCCAND/OR/XOR are 4, 5, and 6.
591 // The input registers for others are operand 1 and 2.
592 unsigned B = 1, E = 3, D = 1;
593 switch (MI->getOpcode()) {
594 case RISCV::PHI:
595 E = MI->getNumOperands();
596 D = 2;
597 break;
598 case RISCV::PseudoCCMOVGPR:
599 case RISCV::PseudoCCMOVGPRNoX0:
600 B = 4;
601 E = 6;
602 break;
603 case RISCV::PseudoCCAND:
604 case RISCV::PseudoCCOR:
605 case RISCV::PseudoCCXOR:
606 case RISCV::PseudoCCANDN:
607 case RISCV::PseudoCCORN:
608 case RISCV::PseudoCCXNOR:
609 B = 4;
610 E = 7;
611 break;
612 }
613
614 for (unsigned I = B; I != E; I += D) {
615 if (!MI->getOperand(I).isReg())
616 return false;
617
618 if (!AddRegToWorkList(MI->getOperand(I).getReg()))
619 return false;
620 }
621
622 break;
623 }
624
625 case RISCV::CZERO_EQZ:
626 case RISCV::CZERO_NEZ:
627 case RISCV::VT_MASKC:
628 case RISCV::VT_MASKCN:
629 // Instructions return zero or operand 1. Result is sign extended if
630 // operand 1 is sign extended.
631 if (!AddRegToWorkList(MI->getOperand(1).getReg()))
632 return false;
633 break;
634
635 case RISCV::ADDI: {
636 if (MI->getOperand(1).isReg() && MI->getOperand(1).getReg().isVirtual()) {
637 if (MachineInstr *SrcMI = MRI.getVRegDef(MI->getOperand(1).getReg())) {
638 if (SrcMI->getOpcode() == RISCV::LUI &&
639 SrcMI->getOperand(1).isImm()) {
640 uint64_t Imm = SrcMI->getOperand(1).getImm();
641 Imm = SignExtend64<32>(Imm << 12);
642 Imm += (uint64_t)MI->getOperand(2).getImm();
643 if (isInt<32>(Imm))
644 continue;
645 }
646 }
647 }
648
649 if (hasAllWUsers(*MI, ST, MRI)) {
650 FixableDef.insert(MI);
651 break;
652 }
653 return false;
654 }
655
656 // With these opcode, we can "fix" them with the W-version
657 // if we know all users of the result only rely on bits 31:0
658 case RISCV::SLLI:
659 // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
660 if (MI->getOperand(2).getImm() >= 32)
661 return false;
662 [[fallthrough]];
663 case RISCV::ADD:
664 case RISCV::LD:
665 case RISCV::LWU:
666 case RISCV::MUL:
667 case RISCV::SUB:
668 if (hasAllWUsers(*MI, ST, MRI)) {
669 FixableDef.insert(MI);
670 break;
671 }
672 return false;
673 }
674 }
675
676 // If we get here, then every node we visited produces a sign extended value
677 // or propagated sign extended values. So the result must be sign extended.
678 return true;
679}
680
681static unsigned getWOp(unsigned Opcode) {
682 switch (Opcode) {
683 case RISCV::ADDI:
684 return RISCV::ADDIW;
685 case RISCV::ADD:
686 return RISCV::ADDW;
687 case RISCV::LD:
688 case RISCV::LWU:
689 return RISCV::LW;
690 case RISCV::MUL:
691 return RISCV::MULW;
692 case RISCV::SLLI:
693 return RISCV::SLLIW;
694 case RISCV::SUB:
695 return RISCV::SUBW;
696 default:
697 llvm_unreachable("Unexpected opcode for replacement with W variant");
698 }
699}
700
701bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
702 const RISCVInstrInfo &TII,
703 const RISCVSubtarget &ST,
706 return false;
707
708 bool MadeChange = false;
709 for (MachineBasicBlock &MBB : MF) {
710 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
711 // We're looking for the sext.w pattern ADDIW rd, rs1, 0.
712 if (!RISCVInstrInfo::isSEXT_W(MI))
713 continue;
714
715 Register SrcReg = MI.getOperand(1).getReg();
716
717 SmallPtrSet<MachineInstr *, 4> FixableDefs;
718
719 // If all users only use the lower bits, this sext.w is redundant.
720 // Or if all definitions reaching MI sign-extend their output,
721 // then sext.w is redundant.
722 if (!hasAllWUsers(MI, ST, MRI) &&
723 !isSignExtendedW(SrcReg, ST, MRI, FixableDefs))
724 continue;
725
726 Register DstReg = MI.getOperand(0).getReg();
727 if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
728 continue;
729
730 // Convert Fixable instructions to their W versions.
731 for (MachineInstr *Fixable : FixableDefs) {
732 LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);
733 Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode())));
734 Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap);
735 Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap);
736 Fixable->clearFlag(MachineInstr::MIFlag::IsExact);
737 LLVM_DEBUG(dbgs() << " with " << *Fixable);
738 ++NumTransformedToWInstrs;
739 }
740
741 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
742 MRI.replaceRegWith(DstReg, SrcReg);
743 MRI.clearKillFlags(SrcReg);
744 MI.eraseFromParent();
745 ++NumRemovedSExtW;
746 MadeChange = true;
747 }
748 }
749
750 return MadeChange;
751}
752
753// Strips or adds W suffixes to eligible instructions depending on the
754// subtarget preferences.
755bool RISCVOptWInstrs::canonicalizeWSuffixes(MachineFunction &MF,
756 const RISCVInstrInfo &TII,
757 const RISCVSubtarget &ST,
758 MachineRegisterInfo &MRI) {
759 bool ShouldStripW = !(DisableStripWSuffix || ST.preferWInst());
760 bool ShouldPreferW = ST.preferWInst();
761 bool MadeChange = false;
762
763 for (MachineBasicBlock &MBB : MF) {
764 for (MachineInstr &MI : MBB) {
765 std::optional<unsigned> WOpc;
766 std::optional<unsigned> NonWOpc;
767 unsigned OrigOpc = MI.getOpcode();
768 switch (OrigOpc) {
769 default:
770 continue;
771 case RISCV::ADDW:
772 NonWOpc = RISCV::ADD;
773 break;
774 case RISCV::ADDIW:
775 NonWOpc = RISCV::ADDI;
776 break;
777 case RISCV::MULW:
778 NonWOpc = RISCV::MUL;
779 break;
780 case RISCV::SLLIW:
781 NonWOpc = RISCV::SLLI;
782 break;
783 case RISCV::SUBW:
784 NonWOpc = RISCV::SUB;
785 break;
786 case RISCV::ADD:
787 WOpc = RISCV::ADDW;
788 break;
789 case RISCV::ADDI:
790 WOpc = RISCV::ADDIW;
791 break;
792 case RISCV::SUB:
793 WOpc = RISCV::SUBW;
794 break;
795 case RISCV::MUL:
796 WOpc = RISCV::MULW;
797 break;
798 case RISCV::SLLI:
799 // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits.
800 if (MI.getOperand(2).getImm() >= 32)
801 continue;
802 WOpc = RISCV::SLLIW;
803 break;
804 case RISCV::LD:
805 case RISCV::LWU:
806 WOpc = RISCV::LW;
807 break;
808 }
809
810 if (ShouldStripW && NonWOpc.has_value() && hasAllWUsers(MI, ST, MRI)) {
811 LLVM_DEBUG(dbgs() << "Replacing " << MI);
812 MI.setDesc(TII.get(NonWOpc.value()));
813 LLVM_DEBUG(dbgs() << " with " << MI);
814 ++NumTransformedToNonWInstrs;
815 MadeChange = true;
816 continue;
817 }
818 // LWU is always converted to LW when possible as 1) LW is compressible
819 // and 2) it helps minimise differences vs RV32.
820 if ((ShouldPreferW || OrigOpc == RISCV::LWU) && WOpc.has_value() &&
821 hasAllWUsers(MI, ST, MRI)) {
822 LLVM_DEBUG(dbgs() << "Replacing " << MI);
823 MI.setDesc(TII.get(WOpc.value()));
824 MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
825 MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
826 MI.clearFlag(MachineInstr::MIFlag::IsExact);
827 LLVM_DEBUG(dbgs() << " with " << MI);
828 ++NumTransformedToWInstrs;
829 MadeChange = true;
830 continue;
831 }
832 }
833 }
834 return MadeChange;
835}
836
837bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
838 if (skipFunction(MF.getFunction()))
839 return false;
840
841 MachineRegisterInfo &MRI = MF.getRegInfo();
842 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
843 const RISCVInstrInfo &TII = *ST.getInstrInfo();
844
845 if (!ST.is64Bit())
846 return false;
847
848 bool MadeChange = false;
849 MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
850 MadeChange |= canonicalizeWSuffixes(MF, TII, ST, MRI);
851 return MadeChange;
852}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< bool > DisableSExtWRemoval("loongarch-disable-sextw-removal", cl::desc("Disable removal of sign-extend insn"), cl::init(false), cl::Hidden)
static bool hasAllWUsers(const MachineInstr &OrigMI, const LoongArchSubtarget &ST, const MachineRegisterInfo &MRI)
static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST, const MachineRegisterInfo &MRI, SmallPtrSetImpl< MachineInstr * > &FixableDef)
static unsigned getWOp(unsigned Opcode)
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, const MachineRegisterInfo &MRI, SmallPtrSetImpl< MachineInstr * > &FixableDef)
static bool hasAllWUsers(const MachineInstr &OrigMI, const RISCVSubtarget &ST, const MachineRegisterInfo &MRI)
static bool isSignExtendingOpW(const MachineInstr &MI, unsigned OpNo)
static cl::opt< bool > DisableStripWSuffix("riscv-disable-strip-w-suffix", cl::desc("Disable strip W suffix"), cl::init(false), cl::Hidden)
static bool hasAllNBitUsers(const MachineInstr &OrigMI, const RISCVSubtarget &ST, const MachineRegisterInfo &MRI, unsigned OrigBits)
#define RISCV_OPT_W_INSTRS_NAME
static bool vectorPseudoHasAllNBitUsers(const MachineOperand &UserOp, unsigned Bits)
static cl::opt< bool > DisableSExtWRemoval("riscv-disable-sextw-removal", cl::desc("Disable removal of sext.w"), cl::init(false), cl::Hidden)
static unsigned getWOp(unsigned Opcode)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Describe properties that are true of each instruction in the target description file.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool isLiveIn(Register Reg) const
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool hasVLOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
FunctionPass * createRISCVOptWInstrsPass()
constexpr unsigned BitWidth
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572