LLVM 22.0.0git
RISCVVLOptimizer.cpp
Go to the documentation of this file.
1//===-------------- RISCVVLOptimizer.cpp - VL Optimizer -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass reduces the VL where possible at the MI level, before VSETVLI
10// instructions are inserted.
11//
12// The purpose of this optimization is to make the VL argument, for instructions
13// that have a VL argument, as small as possible.
14//
15// This is split into a sparse dataflow analysis where we determine what VL is
16// demanded by each instruction first, and then afterwards try to reduce the VL
17// of each instruction if it demands less than its VL operand.
18//
19// The analysis is explained in more detail in the 2025 EuroLLVM Developers'
20// Meeting talk "Accidental Dataflow Analysis: Extending the RISC-V VL
21// Optimizer", which is available on YouTube at
22// https://www.youtube.com/watch?v=Mfb5fRSdJAc
23//
24// The slides for the talk are available at
25// https://llvm.org/devmtg/2025-04/slides/technical_talk/lau_accidental_dataflow.pdf
26//
27//===---------------------------------------------------------------------===//
28
29#include "RISCV.h"
30#include "RISCVSubtarget.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "riscv-vl-optimizer"
39#define PASS_NAME "RISC-V VL Optimizer"
40
41namespace {
42
43/// Wrapper around MachineOperand that defaults to immediate 0.
44struct DemandedVL {
46 DemandedVL() : VL(MachineOperand::CreateImm(0)) {}
47 DemandedVL(MachineOperand VL) : VL(VL) {}
48 static DemandedVL vlmax() {
50 }
51 bool operator!=(const DemandedVL &Other) const {
52 return !VL.isIdenticalTo(Other.VL);
53 }
54
55 DemandedVL max(const DemandedVL &X) const {
56 if (RISCV::isVLKnownLE(VL, X.VL))
57 return X;
58 if (RISCV::isVLKnownLE(X.VL, VL))
59 return *this;
60 return DemandedVL::vlmax();
61 }
62};
63
64class RISCVVLOptimizer : public MachineFunctionPass {
66 const MachineDominatorTree *MDT;
67 const TargetInstrInfo *TII;
68
69public:
70 static char ID;
71
72 RISCVVLOptimizer() : MachineFunctionPass(ID) {}
73
74 bool runOnMachineFunction(MachineFunction &MF) override;
75
76 void getAnalysisUsage(AnalysisUsage &AU) const override {
77 AU.setPreservesCFG();
80 }
81
82 StringRef getPassName() const override { return PASS_NAME; }
83
84private:
85 DemandedVL getMinimumVLForUser(const MachineOperand &UserOp) const;
86 /// Returns true if the users of \p MI have compatible EEWs and SEWs.
87 bool checkUsers(const MachineInstr &MI) const;
88 bool tryReduceVL(MachineInstr &MI) const;
89 bool isCandidate(const MachineInstr &MI) const;
90 void transfer(const MachineInstr &MI);
91
92 /// For a given instruction, records what elements of it are demanded by
93 /// downstream users.
96
97 /// \returns all vector virtual registers that \p MI uses.
98 auto virtual_vec_uses(const MachineInstr &MI) const {
99 return make_filter_range(MI.uses(), [this](const MachineOperand &MO) {
100 return MO.isReg() && MO.getReg().isVirtual() &&
101 RISCVRegisterInfo::isRVVRegClass(MRI->getRegClass(MO.getReg()));
102 });
103 }
104};
105
106/// Represents the EMUL and EEW of a MachineOperand.
107struct OperandInfo {
108 // Represent as 1,2,4,8, ... and fractional indicator. This is because
109 // EMUL can take on values that don't map to RISCVVType::VLMUL values exactly.
110 // For example, a mask operand can have an EMUL less than MF8.
111 // If nullopt, then EMUL isn't used (i.e. only a single scalar is read).
112 std::optional<std::pair<unsigned, bool>> EMUL;
113
114 unsigned Log2EEW;
115
116 OperandInfo(RISCVVType::VLMUL EMUL, unsigned Log2EEW)
117 : EMUL(RISCVVType::decodeVLMUL(EMUL)), Log2EEW(Log2EEW) {}
118
119 OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)
120 : EMUL(EMUL), Log2EEW(Log2EEW) {}
121
122 OperandInfo(unsigned Log2EEW) : Log2EEW(Log2EEW) {}
123
124 OperandInfo() = delete;
125
126 /// Return true if the EMUL and EEW produced by \p Def is compatible with the
127 /// EMUL and EEW used by \p User.
128 static bool areCompatible(const OperandInfo &Def, const OperandInfo &User) {
129 if (Def.Log2EEW != User.Log2EEW)
130 return false;
131 if (User.EMUL && Def.EMUL != User.EMUL)
132 return false;
133 return true;
134 }
135
136 void print(raw_ostream &OS) const {
137 if (EMUL) {
138 OS << "EMUL: m";
139 if (EMUL->second)
140 OS << "f";
141 OS << EMUL->first;
142 } else
143 OS << "EMUL: none\n";
144 OS << ", EEW: " << (1 << Log2EEW);
145 }
146};
147
148} // end anonymous namespace
149
150char RISCVVLOptimizer::ID = 0;
151INITIALIZE_PASS_BEGIN(RISCVVLOptimizer, DEBUG_TYPE, PASS_NAME, false, false)
154
156 return new RISCVVLOptimizer();
157}
158
160static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {
161 OI.print(OS);
162 return OS;
163}
164
167 const std::optional<OperandInfo> &OI) {
168 if (OI)
169 OI->print(OS);
170 else
171 OS << "nullopt";
172 return OS;
173}
174
175/// Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and
176/// SEW are from the TSFlags of MI.
177static std::pair<unsigned, bool>
179 RISCVVType::VLMUL MIVLMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
180 auto [MILMUL, MILMULIsFractional] = RISCVVType::decodeVLMUL(MIVLMUL);
181 unsigned MILog2SEW =
182 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
183
184 // Mask instructions will have 0 as the SEW operand. But the LMUL of these
185 // instructions is calculated is as if the SEW operand was 3 (e8).
186 if (MILog2SEW == 0)
187 MILog2SEW = 3;
188
189 unsigned MISEW = 1 << MILog2SEW;
190
191 unsigned EEW = 1 << Log2EEW;
192 // Calculate (EEW/SEW)*LMUL preserving fractions less than 1. Use GCD
193 // to put fraction in simplest form.
194 unsigned Num = EEW, Denom = MISEW;
195 int GCD = MILMULIsFractional ? std::gcd(Num, Denom * MILMUL)
196 : std::gcd(Num * MILMUL, Denom);
197 Num = MILMULIsFractional ? Num / GCD : Num * MILMUL / GCD;
198 Denom = MILMULIsFractional ? Denom * MILMUL / GCD : Denom / GCD;
199 return std::make_pair(Num > Denom ? Num : Denom, Denom > Num);
200}
201
202/// Dest has EEW=SEW. Source EEW=SEW/Factor (i.e. F2 => EEW/2).
203/// SEW comes from TSFlags of MI.
204static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
205 const MachineInstr &MI,
206 const MachineOperand &MO) {
207 unsigned MILog2SEW =
208 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
209
210 if (MO.getOperandNo() == 0)
211 return MILog2SEW;
212
213 unsigned MISEW = 1 << MILog2SEW;
214 unsigned EEW = MISEW / Factor;
215 unsigned Log2EEW = Log2_32(EEW);
216
217 return Log2EEW;
218}
219
220#define VSEG_CASES(Prefix, EEW) \
221 RISCV::Prefix##SEG2E##EEW##_V: \
222 case RISCV::Prefix##SEG3E##EEW##_V: \
223 case RISCV::Prefix##SEG4E##EEW##_V: \
224 case RISCV::Prefix##SEG5E##EEW##_V: \
225 case RISCV::Prefix##SEG6E##EEW##_V: \
226 case RISCV::Prefix##SEG7E##EEW##_V: \
227 case RISCV::Prefix##SEG8E##EEW##_V
228#define VSSEG_CASES(EEW) VSEG_CASES(VS, EEW)
229#define VSSSEG_CASES(EEW) VSEG_CASES(VSS, EEW)
230#define VSUXSEG_CASES(EEW) VSEG_CASES(VSUX, I##EEW)
231#define VSOXSEG_CASES(EEW) VSEG_CASES(VSOX, I##EEW)
232
233static std::optional<unsigned> getOperandLog2EEW(const MachineOperand &MO) {
234 const MachineInstr &MI = *MO.getParent();
235 const MCInstrDesc &Desc = MI.getDesc();
237 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
238 assert(RVV && "Could not find MI in PseudoTable");
239
240 // MI has a SEW associated with it. The RVV specification defines
241 // the EEW of each operand and definition in relation to MI.SEW.
242 unsigned MILog2SEW = MI.getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
243
244 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);
245 const bool IsTied = RISCVII::isTiedPseudo(Desc.TSFlags);
246
247 bool IsMODef = MO.getOperandNo() == 0 ||
248 (HasPassthru && MO.getOperandNo() == MI.getNumExplicitDefs());
249
250 // All mask operands have EEW=1
251 const MCOperandInfo &Info = Desc.operands()[MO.getOperandNo()];
252 if (Info.OperandType == MCOI::OPERAND_REGISTER &&
253 Info.RegClass == RISCV::VMV0RegClassID)
254 return 0;
255
256 // switch against BaseInstr to reduce number of cases that need to be
257 // considered.
258 switch (RVV->BaseInstr) {
259
260 // 6. Configuration-Setting Instructions
261 // Configuration setting instructions do not read or write vector registers
262 case RISCV::VSETIVLI:
263 case RISCV::VSETVL:
264 case RISCV::VSETVLI:
265 llvm_unreachable("Configuration setting instructions do not read or write "
266 "vector registers");
267
268 // Vector Loads and Stores
269 // Vector Unit-Stride Instructions
270 // Vector Strided Instructions
271 /// Dest EEW encoded in the instruction
272 case RISCV::VLM_V:
273 case RISCV::VSM_V:
274 return 0;
275 case RISCV::VLE8_V:
276 case RISCV::VSE8_V:
277 case RISCV::VLSE8_V:
278 case RISCV::VSSE8_V:
279 case VSSEG_CASES(8):
280 case VSSSEG_CASES(8):
281 return 3;
282 case RISCV::VLE16_V:
283 case RISCV::VSE16_V:
284 case RISCV::VLSE16_V:
285 case RISCV::VSSE16_V:
286 case VSSEG_CASES(16):
287 case VSSSEG_CASES(16):
288 return 4;
289 case RISCV::VLE32_V:
290 case RISCV::VSE32_V:
291 case RISCV::VLSE32_V:
292 case RISCV::VSSE32_V:
293 case VSSEG_CASES(32):
294 case VSSSEG_CASES(32):
295 return 5;
296 case RISCV::VLE64_V:
297 case RISCV::VSE64_V:
298 case RISCV::VLSE64_V:
299 case RISCV::VSSE64_V:
300 case VSSEG_CASES(64):
301 case VSSSEG_CASES(64):
302 return 6;
303
304 // Vector Indexed Instructions
305 // vs(o|u)xei<eew>.v
306 // Dest/Data (operand 0) EEW=SEW. Source EEW=<eew>.
307 case RISCV::VLUXEI8_V:
308 case RISCV::VLOXEI8_V:
309 case RISCV::VSUXEI8_V:
310 case RISCV::VSOXEI8_V:
311 case VSUXSEG_CASES(8):
312 case VSOXSEG_CASES(8): {
313 if (MO.getOperandNo() == 0)
314 return MILog2SEW;
315 return 3;
316 }
317 case RISCV::VLUXEI16_V:
318 case RISCV::VLOXEI16_V:
319 case RISCV::VSUXEI16_V:
320 case RISCV::VSOXEI16_V:
321 case VSUXSEG_CASES(16):
322 case VSOXSEG_CASES(16): {
323 if (MO.getOperandNo() == 0)
324 return MILog2SEW;
325 return 4;
326 }
327 case RISCV::VLUXEI32_V:
328 case RISCV::VLOXEI32_V:
329 case RISCV::VSUXEI32_V:
330 case RISCV::VSOXEI32_V:
331 case VSUXSEG_CASES(32):
332 case VSOXSEG_CASES(32): {
333 if (MO.getOperandNo() == 0)
334 return MILog2SEW;
335 return 5;
336 }
337 case RISCV::VLUXEI64_V:
338 case RISCV::VLOXEI64_V:
339 case RISCV::VSUXEI64_V:
340 case RISCV::VSOXEI64_V:
341 case VSUXSEG_CASES(64):
342 case VSOXSEG_CASES(64): {
343 if (MO.getOperandNo() == 0)
344 return MILog2SEW;
345 return 6;
346 }
347
348 // Vector Integer Arithmetic Instructions
349 // Vector Single-Width Integer Add and Subtract
350 case RISCV::VADD_VI:
351 case RISCV::VADD_VV:
352 case RISCV::VADD_VX:
353 case RISCV::VSUB_VV:
354 case RISCV::VSUB_VX:
355 case RISCV::VRSUB_VI:
356 case RISCV::VRSUB_VX:
357 // Vector Bitwise Logical Instructions
358 // Vector Single-Width Shift Instructions
359 // EEW=SEW.
360 case RISCV::VAND_VI:
361 case RISCV::VAND_VV:
362 case RISCV::VAND_VX:
363 case RISCV::VOR_VI:
364 case RISCV::VOR_VV:
365 case RISCV::VOR_VX:
366 case RISCV::VXOR_VI:
367 case RISCV::VXOR_VV:
368 case RISCV::VXOR_VX:
369 case RISCV::VSLL_VI:
370 case RISCV::VSLL_VV:
371 case RISCV::VSLL_VX:
372 case RISCV::VSRL_VI:
373 case RISCV::VSRL_VV:
374 case RISCV::VSRL_VX:
375 case RISCV::VSRA_VI:
376 case RISCV::VSRA_VV:
377 case RISCV::VSRA_VX:
378 // Vector Integer Min/Max Instructions
379 // EEW=SEW.
380 case RISCV::VMINU_VV:
381 case RISCV::VMINU_VX:
382 case RISCV::VMIN_VV:
383 case RISCV::VMIN_VX:
384 case RISCV::VMAXU_VV:
385 case RISCV::VMAXU_VX:
386 case RISCV::VMAX_VV:
387 case RISCV::VMAX_VX:
388 // Vector Single-Width Integer Multiply Instructions
389 // Source and Dest EEW=SEW.
390 case RISCV::VMUL_VV:
391 case RISCV::VMUL_VX:
392 case RISCV::VMULH_VV:
393 case RISCV::VMULH_VX:
394 case RISCV::VMULHU_VV:
395 case RISCV::VMULHU_VX:
396 case RISCV::VMULHSU_VV:
397 case RISCV::VMULHSU_VX:
398 // Vector Integer Divide Instructions
399 // EEW=SEW.
400 case RISCV::VDIVU_VV:
401 case RISCV::VDIVU_VX:
402 case RISCV::VDIV_VV:
403 case RISCV::VDIV_VX:
404 case RISCV::VREMU_VV:
405 case RISCV::VREMU_VX:
406 case RISCV::VREM_VV:
407 case RISCV::VREM_VX:
408 // Vector Single-Width Integer Multiply-Add Instructions
409 // EEW=SEW.
410 case RISCV::VMACC_VV:
411 case RISCV::VMACC_VX:
412 case RISCV::VNMSAC_VV:
413 case RISCV::VNMSAC_VX:
414 case RISCV::VMADD_VV:
415 case RISCV::VMADD_VX:
416 case RISCV::VNMSUB_VV:
417 case RISCV::VNMSUB_VX:
418 // Vector Integer Merge Instructions
419 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
420 // EEW=SEW, except the mask operand has EEW=1. Mask operand is handled
421 // before this switch.
422 case RISCV::VMERGE_VIM:
423 case RISCV::VMERGE_VVM:
424 case RISCV::VMERGE_VXM:
425 case RISCV::VADC_VIM:
426 case RISCV::VADC_VVM:
427 case RISCV::VADC_VXM:
428 case RISCV::VSBC_VVM:
429 case RISCV::VSBC_VXM:
430 // Vector Integer Move Instructions
431 // Vector Fixed-Point Arithmetic Instructions
432 // Vector Single-Width Saturating Add and Subtract
433 // Vector Single-Width Averaging Add and Subtract
434 // EEW=SEW.
435 case RISCV::VMV_V_I:
436 case RISCV::VMV_V_V:
437 case RISCV::VMV_V_X:
438 case RISCV::VSADDU_VI:
439 case RISCV::VSADDU_VV:
440 case RISCV::VSADDU_VX:
441 case RISCV::VSADD_VI:
442 case RISCV::VSADD_VV:
443 case RISCV::VSADD_VX:
444 case RISCV::VSSUBU_VV:
445 case RISCV::VSSUBU_VX:
446 case RISCV::VSSUB_VV:
447 case RISCV::VSSUB_VX:
448 case RISCV::VAADDU_VV:
449 case RISCV::VAADDU_VX:
450 case RISCV::VAADD_VV:
451 case RISCV::VAADD_VX:
452 case RISCV::VASUBU_VV:
453 case RISCV::VASUBU_VX:
454 case RISCV::VASUB_VV:
455 case RISCV::VASUB_VX:
456 // Vector Single-Width Fractional Multiply with Rounding and Saturation
457 // EEW=SEW. The instruction produces 2*SEW product internally but
458 // saturates to fit into SEW bits.
459 case RISCV::VSMUL_VV:
460 case RISCV::VSMUL_VX:
461 // Vector Single-Width Scaling Shift Instructions
462 // EEW=SEW.
463 case RISCV::VSSRL_VI:
464 case RISCV::VSSRL_VV:
465 case RISCV::VSSRL_VX:
466 case RISCV::VSSRA_VI:
467 case RISCV::VSSRA_VV:
468 case RISCV::VSSRA_VX:
469 // Vector Permutation Instructions
470 // Integer Scalar Move Instructions
471 // Floating-Point Scalar Move Instructions
472 // EEW=SEW.
473 case RISCV::VMV_X_S:
474 case RISCV::VMV_S_X:
475 case RISCV::VFMV_F_S:
476 case RISCV::VFMV_S_F:
477 // Vector Slide Instructions
478 // EEW=SEW.
479 case RISCV::VSLIDEUP_VI:
480 case RISCV::VSLIDEUP_VX:
481 case RISCV::VSLIDEDOWN_VI:
482 case RISCV::VSLIDEDOWN_VX:
483 case RISCV::VSLIDE1UP_VX:
484 case RISCV::VFSLIDE1UP_VF:
485 case RISCV::VSLIDE1DOWN_VX:
486 case RISCV::VFSLIDE1DOWN_VF:
487 // Vector Register Gather Instructions
488 // EEW=SEW. For mask operand, EEW=1.
489 case RISCV::VRGATHER_VI:
490 case RISCV::VRGATHER_VV:
491 case RISCV::VRGATHER_VX:
492 // Vector Element Index Instruction
493 case RISCV::VID_V:
494 // Vector Single-Width Floating-Point Add/Subtract Instructions
495 case RISCV::VFADD_VF:
496 case RISCV::VFADD_VV:
497 case RISCV::VFSUB_VF:
498 case RISCV::VFSUB_VV:
499 case RISCV::VFRSUB_VF:
500 // Vector Single-Width Floating-Point Multiply/Divide Instructions
501 case RISCV::VFMUL_VF:
502 case RISCV::VFMUL_VV:
503 case RISCV::VFDIV_VF:
504 case RISCV::VFDIV_VV:
505 case RISCV::VFRDIV_VF:
506 // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
507 case RISCV::VFMACC_VV:
508 case RISCV::VFMACC_VF:
509 case RISCV::VFNMACC_VV:
510 case RISCV::VFNMACC_VF:
511 case RISCV::VFMSAC_VV:
512 case RISCV::VFMSAC_VF:
513 case RISCV::VFNMSAC_VV:
514 case RISCV::VFNMSAC_VF:
515 case RISCV::VFMADD_VV:
516 case RISCV::VFMADD_VF:
517 case RISCV::VFNMADD_VV:
518 case RISCV::VFNMADD_VF:
519 case RISCV::VFMSUB_VV:
520 case RISCV::VFMSUB_VF:
521 case RISCV::VFNMSUB_VV:
522 case RISCV::VFNMSUB_VF:
523 // Vector Floating-Point Square-Root Instruction
524 case RISCV::VFSQRT_V:
525 // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
526 case RISCV::VFRSQRT7_V:
527 // Vector Floating-Point Reciprocal Estimate Instruction
528 case RISCV::VFREC7_V:
529 // Vector Floating-Point MIN/MAX Instructions
530 case RISCV::VFMIN_VF:
531 case RISCV::VFMIN_VV:
532 case RISCV::VFMAX_VF:
533 case RISCV::VFMAX_VV:
534 // Vector Floating-Point Sign-Injection Instructions
535 case RISCV::VFSGNJ_VF:
536 case RISCV::VFSGNJ_VV:
537 case RISCV::VFSGNJN_VV:
538 case RISCV::VFSGNJN_VF:
539 case RISCV::VFSGNJX_VF:
540 case RISCV::VFSGNJX_VV:
541 // Vector Floating-Point Classify Instruction
542 case RISCV::VFCLASS_V:
543 // Vector Floating-Point Move Instruction
544 case RISCV::VFMV_V_F:
545 // Single-Width Floating-Point/Integer Type-Convert Instructions
546 case RISCV::VFCVT_XU_F_V:
547 case RISCV::VFCVT_X_F_V:
548 case RISCV::VFCVT_RTZ_XU_F_V:
549 case RISCV::VFCVT_RTZ_X_F_V:
550 case RISCV::VFCVT_F_XU_V:
551 case RISCV::VFCVT_F_X_V:
552 // Vector Floating-Point Merge Instruction
553 case RISCV::VFMERGE_VFM:
554 // Vector count population in mask vcpop.m
555 // vfirst find-first-set mask bit
556 case RISCV::VCPOP_M:
557 case RISCV::VFIRST_M:
558 // Vector Bit-manipulation Instructions (Zvbb)
559 // Vector And-Not
560 case RISCV::VANDN_VV:
561 case RISCV::VANDN_VX:
562 // Vector Reverse Bits in Elements
563 case RISCV::VBREV_V:
564 // Vector Reverse Bits in Bytes
565 case RISCV::VBREV8_V:
566 // Vector Reverse Bytes
567 case RISCV::VREV8_V:
568 // Vector Count Leading Zeros
569 case RISCV::VCLZ_V:
570 // Vector Count Trailing Zeros
571 case RISCV::VCTZ_V:
572 // Vector Population Count
573 case RISCV::VCPOP_V:
574 // Vector Rotate Left
575 case RISCV::VROL_VV:
576 case RISCV::VROL_VX:
577 // Vector Rotate Right
578 case RISCV::VROR_VI:
579 case RISCV::VROR_VV:
580 case RISCV::VROR_VX:
581 // Vector Carry-less Multiplication Instructions (Zvbc)
582 // Vector Carry-less Multiply
583 case RISCV::VCLMUL_VV:
584 case RISCV::VCLMUL_VX:
585 // Vector Carry-less Multiply Return High Half
586 case RISCV::VCLMULH_VV:
587 case RISCV::VCLMULH_VX:
588 return MILog2SEW;
589
590 // Vector Widening Shift Left Logical (Zvbb)
591 case RISCV::VWSLL_VI:
592 case RISCV::VWSLL_VX:
593 case RISCV::VWSLL_VV:
594 // Vector Widening Integer Add/Subtract
595 // Def uses EEW=2*SEW . Operands use EEW=SEW.
596 case RISCV::VWADDU_VV:
597 case RISCV::VWADDU_VX:
598 case RISCV::VWSUBU_VV:
599 case RISCV::VWSUBU_VX:
600 case RISCV::VWADD_VV:
601 case RISCV::VWADD_VX:
602 case RISCV::VWSUB_VV:
603 case RISCV::VWSUB_VX:
604 // Vector Widening Integer Multiply Instructions
605 // Destination EEW=2*SEW. Source EEW=SEW.
606 case RISCV::VWMUL_VV:
607 case RISCV::VWMUL_VX:
608 case RISCV::VWMULSU_VV:
609 case RISCV::VWMULSU_VX:
610 case RISCV::VWMULU_VV:
611 case RISCV::VWMULU_VX:
612 // Vector Widening Integer Multiply-Add Instructions
613 // Destination EEW=2*SEW. Source EEW=SEW.
614 // A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which
615 // is then added to the 2*SEW-bit Dest. These instructions never have a
616 // passthru operand.
617 case RISCV::VWMACCU_VV:
618 case RISCV::VWMACCU_VX:
619 case RISCV::VWMACC_VV:
620 case RISCV::VWMACC_VX:
621 case RISCV::VWMACCSU_VV:
622 case RISCV::VWMACCSU_VX:
623 case RISCV::VWMACCUS_VX:
624 // Vector Widening Floating-Point Fused Multiply-Add Instructions
625 case RISCV::VFWMACC_VF:
626 case RISCV::VFWMACC_VV:
627 case RISCV::VFWNMACC_VF:
628 case RISCV::VFWNMACC_VV:
629 case RISCV::VFWMSAC_VF:
630 case RISCV::VFWMSAC_VV:
631 case RISCV::VFWNMSAC_VF:
632 case RISCV::VFWNMSAC_VV:
633 case RISCV::VFWMACCBF16_VV:
634 case RISCV::VFWMACCBF16_VF:
635 // Vector Widening Floating-Point Add/Subtract Instructions
636 // Dest EEW=2*SEW. Source EEW=SEW.
637 case RISCV::VFWADD_VV:
638 case RISCV::VFWADD_VF:
639 case RISCV::VFWSUB_VV:
640 case RISCV::VFWSUB_VF:
641 // Vector Widening Floating-Point Multiply
642 case RISCV::VFWMUL_VF:
643 case RISCV::VFWMUL_VV:
644 // Widening Floating-Point/Integer Type-Convert Instructions
645 case RISCV::VFWCVT_XU_F_V:
646 case RISCV::VFWCVT_X_F_V:
647 case RISCV::VFWCVT_RTZ_XU_F_V:
648 case RISCV::VFWCVT_RTZ_X_F_V:
649 case RISCV::VFWCVT_F_XU_V:
650 case RISCV::VFWCVT_F_X_V:
651 case RISCV::VFWCVT_F_F_V:
652 case RISCV::VFWCVTBF16_F_F_V:
653 return IsMODef ? MILog2SEW + 1 : MILog2SEW;
654
655 // Def and Op1 uses EEW=2*SEW. Op2 uses EEW=SEW.
656 case RISCV::VWADDU_WV:
657 case RISCV::VWADDU_WX:
658 case RISCV::VWSUBU_WV:
659 case RISCV::VWSUBU_WX:
660 case RISCV::VWADD_WV:
661 case RISCV::VWADD_WX:
662 case RISCV::VWSUB_WV:
663 case RISCV::VWSUB_WX:
664 // Vector Widening Floating-Point Add/Subtract Instructions
665 case RISCV::VFWADD_WF:
666 case RISCV::VFWADD_WV:
667 case RISCV::VFWSUB_WF:
668 case RISCV::VFWSUB_WV: {
669 bool IsOp1 = (HasPassthru && !IsTied) ? MO.getOperandNo() == 2
670 : MO.getOperandNo() == 1;
671 bool TwoTimes = IsMODef || IsOp1;
672 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
673 }
674
675 // Vector Integer Extension
676 case RISCV::VZEXT_VF2:
677 case RISCV::VSEXT_VF2:
678 return getIntegerExtensionOperandEEW(2, MI, MO);
679 case RISCV::VZEXT_VF4:
680 case RISCV::VSEXT_VF4:
681 return getIntegerExtensionOperandEEW(4, MI, MO);
682 case RISCV::VZEXT_VF8:
683 case RISCV::VSEXT_VF8:
684 return getIntegerExtensionOperandEEW(8, MI, MO);
685
686 // Vector Narrowing Integer Right Shift Instructions
687 // Destination EEW=SEW, Op 1 has EEW=2*SEW. Op2 has EEW=SEW
688 case RISCV::VNSRL_WX:
689 case RISCV::VNSRL_WI:
690 case RISCV::VNSRL_WV:
691 case RISCV::VNSRA_WI:
692 case RISCV::VNSRA_WV:
693 case RISCV::VNSRA_WX:
694 // Vector Narrowing Fixed-Point Clip Instructions
695 // Destination and Op1 EEW=SEW. Op2 EEW=2*SEW.
696 case RISCV::VNCLIPU_WI:
697 case RISCV::VNCLIPU_WV:
698 case RISCV::VNCLIPU_WX:
699 case RISCV::VNCLIP_WI:
700 case RISCV::VNCLIP_WV:
701 case RISCV::VNCLIP_WX:
702 // Narrowing Floating-Point/Integer Type-Convert Instructions
703 case RISCV::VFNCVT_XU_F_W:
704 case RISCV::VFNCVT_X_F_W:
705 case RISCV::VFNCVT_RTZ_XU_F_W:
706 case RISCV::VFNCVT_RTZ_X_F_W:
707 case RISCV::VFNCVT_F_XU_W:
708 case RISCV::VFNCVT_F_X_W:
709 case RISCV::VFNCVT_F_F_W:
710 case RISCV::VFNCVT_ROD_F_F_W:
711 case RISCV::VFNCVTBF16_F_F_W: {
712 assert(!IsTied);
713 bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
714 bool TwoTimes = IsOp1;
715 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
716 }
717
718 // Vector Mask Instructions
719 // Vector Mask-Register Logical Instructions
720 // vmsbf.m set-before-first mask bit
721 // vmsif.m set-including-first mask bit
722 // vmsof.m set-only-first mask bit
723 // EEW=1
724 // We handle the cases when operand is a v0 mask operand above the switch,
725 // but these instructions may use non-v0 mask operands and need to be handled
726 // specifically.
727 case RISCV::VMAND_MM:
728 case RISCV::VMNAND_MM:
729 case RISCV::VMANDN_MM:
730 case RISCV::VMXOR_MM:
731 case RISCV::VMOR_MM:
732 case RISCV::VMNOR_MM:
733 case RISCV::VMORN_MM:
734 case RISCV::VMXNOR_MM:
735 case RISCV::VMSBF_M:
736 case RISCV::VMSIF_M:
737 case RISCV::VMSOF_M: {
738 return MILog2SEW;
739 }
740
741 // Vector Compress Instruction
742 // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled
743 // before this switch.
744 case RISCV::VCOMPRESS_VM:
745 return MO.getOperandNo() == 3 ? 0 : MILog2SEW;
746
747 // Vector Iota Instruction
748 // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled
749 // before this switch.
750 case RISCV::VIOTA_M: {
751 if (IsMODef || MO.getOperandNo() == 1)
752 return MILog2SEW;
753 return 0;
754 }
755
756 // Vector Integer Compare Instructions
757 // Dest EEW=1. Source EEW=SEW.
758 case RISCV::VMSEQ_VI:
759 case RISCV::VMSEQ_VV:
760 case RISCV::VMSEQ_VX:
761 case RISCV::VMSNE_VI:
762 case RISCV::VMSNE_VV:
763 case RISCV::VMSNE_VX:
764 case RISCV::VMSLTU_VV:
765 case RISCV::VMSLTU_VX:
766 case RISCV::VMSLT_VV:
767 case RISCV::VMSLT_VX:
768 case RISCV::VMSLEU_VV:
769 case RISCV::VMSLEU_VI:
770 case RISCV::VMSLEU_VX:
771 case RISCV::VMSLE_VV:
772 case RISCV::VMSLE_VI:
773 case RISCV::VMSLE_VX:
774 case RISCV::VMSGTU_VI:
775 case RISCV::VMSGTU_VX:
776 case RISCV::VMSGT_VI:
777 case RISCV::VMSGT_VX:
778 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
779 // Dest EEW=1. Source EEW=SEW. Mask source operand handled above this switch.
780 case RISCV::VMADC_VIM:
781 case RISCV::VMADC_VVM:
782 case RISCV::VMADC_VXM:
783 case RISCV::VMSBC_VVM:
784 case RISCV::VMSBC_VXM:
785 // Dest EEW=1. Source EEW=SEW.
786 case RISCV::VMADC_VV:
787 case RISCV::VMADC_VI:
788 case RISCV::VMADC_VX:
789 case RISCV::VMSBC_VV:
790 case RISCV::VMSBC_VX:
791 // 13.13. Vector Floating-Point Compare Instructions
792 // Dest EEW=1. Source EEW=SEW
793 case RISCV::VMFEQ_VF:
794 case RISCV::VMFEQ_VV:
795 case RISCV::VMFNE_VF:
796 case RISCV::VMFNE_VV:
797 case RISCV::VMFLT_VF:
798 case RISCV::VMFLT_VV:
799 case RISCV::VMFLE_VF:
800 case RISCV::VMFLE_VV:
801 case RISCV::VMFGT_VF:
802 case RISCV::VMFGE_VF: {
803 if (IsMODef)
804 return 0;
805 return MILog2SEW;
806 }
807
808 // Vector Reduction Operations
809 // Vector Single-Width Integer Reduction Instructions
810 case RISCV::VREDAND_VS:
811 case RISCV::VREDMAX_VS:
812 case RISCV::VREDMAXU_VS:
813 case RISCV::VREDMIN_VS:
814 case RISCV::VREDMINU_VS:
815 case RISCV::VREDOR_VS:
816 case RISCV::VREDSUM_VS:
817 case RISCV::VREDXOR_VS:
818 // Vector Single-Width Floating-Point Reduction Instructions
819 case RISCV::VFREDMAX_VS:
820 case RISCV::VFREDMIN_VS:
821 case RISCV::VFREDOSUM_VS:
822 case RISCV::VFREDUSUM_VS: {
823 return MILog2SEW;
824 }
825
826 // Vector Widening Integer Reduction Instructions
827 // The Dest and VS1 read only element 0 for the vector register. Return
828 // 2*EEW for these. VS2 has EEW=SEW and EMUL=LMUL.
829 case RISCV::VWREDSUM_VS:
830 case RISCV::VWREDSUMU_VS:
831 // Vector Widening Floating-Point Reduction Instructions
832 case RISCV::VFWREDOSUM_VS:
833 case RISCV::VFWREDUSUM_VS: {
834 bool TwoTimes = IsMODef || MO.getOperandNo() == 3;
835 return TwoTimes ? MILog2SEW + 1 : MILog2SEW;
836 }
837
838 // Vector Register Gather with 16-bit Index Elements Instruction
839 // Dest and source data EEW=SEW. Index vector EEW=16.
840 case RISCV::VRGATHEREI16_VV: {
841 if (MO.getOperandNo() == 2)
842 return 4;
843 return MILog2SEW;
844 }
845
846 default:
847 return std::nullopt;
848 }
849}
850
851static std::optional<OperandInfo> getOperandInfo(const MachineOperand &MO) {
852 const MachineInstr &MI = *MO.getParent();
854 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
855 assert(RVV && "Could not find MI in PseudoTable");
856
857 std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO);
858 if (!Log2EEW)
859 return std::nullopt;
860
861 switch (RVV->BaseInstr) {
862 // Vector Reduction Operations
863 // Vector Single-Width Integer Reduction Instructions
864 // Vector Widening Integer Reduction Instructions
865 // Vector Widening Floating-Point Reduction Instructions
866 // The Dest and VS1 only read element 0 of the vector register. Return just
867 // the EEW for these.
868 case RISCV::VREDAND_VS:
869 case RISCV::VREDMAX_VS:
870 case RISCV::VREDMAXU_VS:
871 case RISCV::VREDMIN_VS:
872 case RISCV::VREDMINU_VS:
873 case RISCV::VREDOR_VS:
874 case RISCV::VREDSUM_VS:
875 case RISCV::VREDXOR_VS:
876 case RISCV::VWREDSUM_VS:
877 case RISCV::VWREDSUMU_VS:
878 case RISCV::VFWREDOSUM_VS:
879 case RISCV::VFWREDUSUM_VS:
880 if (MO.getOperandNo() != 2)
881 return OperandInfo(*Log2EEW);
882 break;
883 };
884
885 // All others have EMUL=EEW/SEW*LMUL
886 return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(*Log2EEW, MI), *Log2EEW);
887}
888
889static bool isTupleInsertInstr(const MachineInstr &MI);
890
891/// Return true if this optimization should consider MI for VL reduction. This
892/// white-list approach simplifies this optimization for instructions that may
893/// have more complex semantics with relation to how it uses VL.
894static bool isSupportedInstr(const MachineInstr &MI) {
895 if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI))
896 return true;
897
899 RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
900
901 if (!RVV)
902 return false;
903
904 switch (RVV->BaseInstr) {
905 // Vector Unit-Stride Instructions
906 // Vector Strided Instructions
907 case RISCV::VLM_V:
908 case RISCV::VLE8_V:
909 case RISCV::VLSE8_V:
910 case RISCV::VLE16_V:
911 case RISCV::VLSE16_V:
912 case RISCV::VLE32_V:
913 case RISCV::VLSE32_V:
914 case RISCV::VLE64_V:
915 case RISCV::VLSE64_V:
916 // Vector Indexed Instructions
917 case RISCV::VLUXEI8_V:
918 case RISCV::VLOXEI8_V:
919 case RISCV::VLUXEI16_V:
920 case RISCV::VLOXEI16_V:
921 case RISCV::VLUXEI32_V:
922 case RISCV::VLOXEI32_V:
923 case RISCV::VLUXEI64_V:
924 case RISCV::VLOXEI64_V:
925 // Vector Single-Width Integer Add and Subtract
926 case RISCV::VADD_VI:
927 case RISCV::VADD_VV:
928 case RISCV::VADD_VX:
929 case RISCV::VSUB_VV:
930 case RISCV::VSUB_VX:
931 case RISCV::VRSUB_VI:
932 case RISCV::VRSUB_VX:
933 // Vector Bitwise Logical Instructions
934 // Vector Single-Width Shift Instructions
935 case RISCV::VAND_VI:
936 case RISCV::VAND_VV:
937 case RISCV::VAND_VX:
938 case RISCV::VOR_VI:
939 case RISCV::VOR_VV:
940 case RISCV::VOR_VX:
941 case RISCV::VXOR_VI:
942 case RISCV::VXOR_VV:
943 case RISCV::VXOR_VX:
944 case RISCV::VSLL_VI:
945 case RISCV::VSLL_VV:
946 case RISCV::VSLL_VX:
947 case RISCV::VSRL_VI:
948 case RISCV::VSRL_VV:
949 case RISCV::VSRL_VX:
950 case RISCV::VSRA_VI:
951 case RISCV::VSRA_VV:
952 case RISCV::VSRA_VX:
953 // Vector Widening Integer Add/Subtract
954 case RISCV::VWADDU_VV:
955 case RISCV::VWADDU_VX:
956 case RISCV::VWSUBU_VV:
957 case RISCV::VWSUBU_VX:
958 case RISCV::VWADD_VV:
959 case RISCV::VWADD_VX:
960 case RISCV::VWSUB_VV:
961 case RISCV::VWSUB_VX:
962 case RISCV::VWADDU_WV:
963 case RISCV::VWADDU_WX:
964 case RISCV::VWSUBU_WV:
965 case RISCV::VWSUBU_WX:
966 case RISCV::VWADD_WV:
967 case RISCV::VWADD_WX:
968 case RISCV::VWSUB_WV:
969 case RISCV::VWSUB_WX:
970 // Vector Integer Extension
971 case RISCV::VZEXT_VF2:
972 case RISCV::VSEXT_VF2:
973 case RISCV::VZEXT_VF4:
974 case RISCV::VSEXT_VF4:
975 case RISCV::VZEXT_VF8:
976 case RISCV::VSEXT_VF8:
977 // Vector Narrowing Integer Right Shift Instructions
978 case RISCV::VNSRL_WX:
979 case RISCV::VNSRL_WI:
980 case RISCV::VNSRL_WV:
981 case RISCV::VNSRA_WI:
982 case RISCV::VNSRA_WV:
983 case RISCV::VNSRA_WX:
984 // Vector Integer Compare Instructions
985 case RISCV::VMSEQ_VI:
986 case RISCV::VMSEQ_VV:
987 case RISCV::VMSEQ_VX:
988 case RISCV::VMSNE_VI:
989 case RISCV::VMSNE_VV:
990 case RISCV::VMSNE_VX:
991 case RISCV::VMSLTU_VV:
992 case RISCV::VMSLTU_VX:
993 case RISCV::VMSLT_VV:
994 case RISCV::VMSLT_VX:
995 case RISCV::VMSLEU_VV:
996 case RISCV::VMSLEU_VI:
997 case RISCV::VMSLEU_VX:
998 case RISCV::VMSLE_VV:
999 case RISCV::VMSLE_VI:
1000 case RISCV::VMSLE_VX:
1001 case RISCV::VMSGTU_VI:
1002 case RISCV::VMSGTU_VX:
1003 case RISCV::VMSGT_VI:
1004 case RISCV::VMSGT_VX:
1005 // Vector Integer Min/Max Instructions
1006 case RISCV::VMINU_VV:
1007 case RISCV::VMINU_VX:
1008 case RISCV::VMIN_VV:
1009 case RISCV::VMIN_VX:
1010 case RISCV::VMAXU_VV:
1011 case RISCV::VMAXU_VX:
1012 case RISCV::VMAX_VV:
1013 case RISCV::VMAX_VX:
1014 // Vector Single-Width Integer Multiply Instructions
1015 case RISCV::VMUL_VV:
1016 case RISCV::VMUL_VX:
1017 case RISCV::VMULH_VV:
1018 case RISCV::VMULH_VX:
1019 case RISCV::VMULHU_VV:
1020 case RISCV::VMULHU_VX:
1021 case RISCV::VMULHSU_VV:
1022 case RISCV::VMULHSU_VX:
1023 // Vector Integer Divide Instructions
1024 case RISCV::VDIVU_VV:
1025 case RISCV::VDIVU_VX:
1026 case RISCV::VDIV_VV:
1027 case RISCV::VDIV_VX:
1028 case RISCV::VREMU_VV:
1029 case RISCV::VREMU_VX:
1030 case RISCV::VREM_VV:
1031 case RISCV::VREM_VX:
1032 // Vector Widening Integer Multiply Instructions
1033 case RISCV::VWMUL_VV:
1034 case RISCV::VWMUL_VX:
1035 case RISCV::VWMULSU_VV:
1036 case RISCV::VWMULSU_VX:
1037 case RISCV::VWMULU_VV:
1038 case RISCV::VWMULU_VX:
1039 // Vector Single-Width Integer Multiply-Add Instructions
1040 case RISCV::VMACC_VV:
1041 case RISCV::VMACC_VX:
1042 case RISCV::VNMSAC_VV:
1043 case RISCV::VNMSAC_VX:
1044 case RISCV::VMADD_VV:
1045 case RISCV::VMADD_VX:
1046 case RISCV::VNMSUB_VV:
1047 case RISCV::VNMSUB_VX:
1048 // Vector Integer Merge Instructions
1049 case RISCV::VMERGE_VIM:
1050 case RISCV::VMERGE_VVM:
1051 case RISCV::VMERGE_VXM:
1052 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
1053 case RISCV::VADC_VIM:
1054 case RISCV::VADC_VVM:
1055 case RISCV::VADC_VXM:
1056 case RISCV::VMADC_VIM:
1057 case RISCV::VMADC_VVM:
1058 case RISCV::VMADC_VXM:
1059 case RISCV::VSBC_VVM:
1060 case RISCV::VSBC_VXM:
1061 case RISCV::VMSBC_VVM:
1062 case RISCV::VMSBC_VXM:
1063 case RISCV::VMADC_VV:
1064 case RISCV::VMADC_VI:
1065 case RISCV::VMADC_VX:
1066 case RISCV::VMSBC_VV:
1067 case RISCV::VMSBC_VX:
1068 // Vector Widening Integer Multiply-Add Instructions
1069 case RISCV::VWMACCU_VV:
1070 case RISCV::VWMACCU_VX:
1071 case RISCV::VWMACC_VV:
1072 case RISCV::VWMACC_VX:
1073 case RISCV::VWMACCSU_VV:
1074 case RISCV::VWMACCSU_VX:
1075 case RISCV::VWMACCUS_VX:
1076 // Vector Integer Move Instructions
1077 case RISCV::VMV_V_I:
1078 case RISCV::VMV_V_X:
1079 case RISCV::VMV_V_V:
1080 // Vector Single-Width Saturating Add and Subtract
1081 case RISCV::VSADDU_VV:
1082 case RISCV::VSADDU_VX:
1083 case RISCV::VSADDU_VI:
1084 case RISCV::VSADD_VV:
1085 case RISCV::VSADD_VX:
1086 case RISCV::VSADD_VI:
1087 case RISCV::VSSUBU_VV:
1088 case RISCV::VSSUBU_VX:
1089 case RISCV::VSSUB_VV:
1090 case RISCV::VSSUB_VX:
1091 // Vector Single-Width Averaging Add and Subtract
1092 case RISCV::VAADDU_VV:
1093 case RISCV::VAADDU_VX:
1094 case RISCV::VAADD_VV:
1095 case RISCV::VAADD_VX:
1096 case RISCV::VASUBU_VV:
1097 case RISCV::VASUBU_VX:
1098 case RISCV::VASUB_VV:
1099 case RISCV::VASUB_VX:
1100 // Vector Single-Width Fractional Multiply with Rounding and Saturation
1101 case RISCV::VSMUL_VV:
1102 case RISCV::VSMUL_VX:
1103 // Vector Single-Width Scaling Shift Instructions
1104 case RISCV::VSSRL_VV:
1105 case RISCV::VSSRL_VX:
1106 case RISCV::VSSRL_VI:
1107 case RISCV::VSSRA_VV:
1108 case RISCV::VSSRA_VX:
1109 case RISCV::VSSRA_VI:
1110 // Vector Narrowing Fixed-Point Clip Instructions
1111 case RISCV::VNCLIPU_WV:
1112 case RISCV::VNCLIPU_WX:
1113 case RISCV::VNCLIPU_WI:
1114 case RISCV::VNCLIP_WV:
1115 case RISCV::VNCLIP_WX:
1116 case RISCV::VNCLIP_WI:
1117 // Vector Bit-manipulation Instructions (Zvbb)
1118 // Vector And-Not
1119 case RISCV::VANDN_VV:
1120 case RISCV::VANDN_VX:
1121 // Vector Reverse Bits in Elements
1122 case RISCV::VBREV_V:
1123 // Vector Reverse Bits in Bytes
1124 case RISCV::VBREV8_V:
1125 // Vector Reverse Bytes
1126 case RISCV::VREV8_V:
1127 // Vector Count Leading Zeros
1128 case RISCV::VCLZ_V:
1129 // Vector Count Trailing Zeros
1130 case RISCV::VCTZ_V:
1131 // Vector Population Count
1132 case RISCV::VCPOP_V:
1133 // Vector Rotate Left
1134 case RISCV::VROL_VV:
1135 case RISCV::VROL_VX:
1136 // Vector Rotate Right
1137 case RISCV::VROR_VI:
1138 case RISCV::VROR_VV:
1139 case RISCV::VROR_VX:
1140 // Vector Widening Shift Left Logical
1141 case RISCV::VWSLL_VI:
1142 case RISCV::VWSLL_VX:
1143 case RISCV::VWSLL_VV:
1144 // Vector Carry-less Multiplication Instructions (Zvbc)
1145 // Vector Carry-less Multiply
1146 case RISCV::VCLMUL_VV:
1147 case RISCV::VCLMUL_VX:
1148 // Vector Carry-less Multiply Return High Half
1149 case RISCV::VCLMULH_VV:
1150 case RISCV::VCLMULH_VX:
1151 // Vector Mask Instructions
1152 // Vector Mask-Register Logical Instructions
1153 // vmsbf.m set-before-first mask bit
1154 // vmsif.m set-including-first mask bit
1155 // vmsof.m set-only-first mask bit
1156 // Vector Iota Instruction
1157 // Vector Element Index Instruction
1158 case RISCV::VMAND_MM:
1159 case RISCV::VMNAND_MM:
1160 case RISCV::VMANDN_MM:
1161 case RISCV::VMXOR_MM:
1162 case RISCV::VMOR_MM:
1163 case RISCV::VMNOR_MM:
1164 case RISCV::VMORN_MM:
1165 case RISCV::VMXNOR_MM:
1166 case RISCV::VMSBF_M:
1167 case RISCV::VMSIF_M:
1168 case RISCV::VMSOF_M:
1169 case RISCV::VIOTA_M:
1170 case RISCV::VID_V:
1171 // Vector Slide Instructions
1172 case RISCV::VSLIDEUP_VX:
1173 case RISCV::VSLIDEUP_VI:
1174 case RISCV::VSLIDEDOWN_VX:
1175 case RISCV::VSLIDEDOWN_VI:
1176 case RISCV::VSLIDE1UP_VX:
1177 case RISCV::VFSLIDE1UP_VF:
1178 // Vector Register Gather Instructions
1179 case RISCV::VRGATHER_VI:
1180 case RISCV::VRGATHER_VV:
1181 case RISCV::VRGATHER_VX:
1182 case RISCV::VRGATHEREI16_VV:
1183 // Vector Single-Width Floating-Point Add/Subtract Instructions
1184 case RISCV::VFADD_VF:
1185 case RISCV::VFADD_VV:
1186 case RISCV::VFSUB_VF:
1187 case RISCV::VFSUB_VV:
1188 case RISCV::VFRSUB_VF:
1189 // Vector Widening Floating-Point Add/Subtract Instructions
1190 case RISCV::VFWADD_VV:
1191 case RISCV::VFWADD_VF:
1192 case RISCV::VFWSUB_VV:
1193 case RISCV::VFWSUB_VF:
1194 case RISCV::VFWADD_WF:
1195 case RISCV::VFWADD_WV:
1196 case RISCV::VFWSUB_WF:
1197 case RISCV::VFWSUB_WV:
1198 // Vector Single-Width Floating-Point Multiply/Divide Instructions
1199 case RISCV::VFMUL_VF:
1200 case RISCV::VFMUL_VV:
1201 case RISCV::VFDIV_VF:
1202 case RISCV::VFDIV_VV:
1203 case RISCV::VFRDIV_VF:
1204 // Vector Widening Floating-Point Multiply
1205 case RISCV::VFWMUL_VF:
1206 case RISCV::VFWMUL_VV:
1207 // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
1208 case RISCV::VFMACC_VV:
1209 case RISCV::VFMACC_VF:
1210 case RISCV::VFNMACC_VV:
1211 case RISCV::VFNMACC_VF:
1212 case RISCV::VFMSAC_VV:
1213 case RISCV::VFMSAC_VF:
1214 case RISCV::VFNMSAC_VV:
1215 case RISCV::VFNMSAC_VF:
1216 case RISCV::VFMADD_VV:
1217 case RISCV::VFMADD_VF:
1218 case RISCV::VFNMADD_VV:
1219 case RISCV::VFNMADD_VF:
1220 case RISCV::VFMSUB_VV:
1221 case RISCV::VFMSUB_VF:
1222 case RISCV::VFNMSUB_VV:
1223 case RISCV::VFNMSUB_VF:
1224 // Vector Widening Floating-Point Fused Multiply-Add Instructions
1225 case RISCV::VFWMACC_VV:
1226 case RISCV::VFWMACC_VF:
1227 case RISCV::VFWNMACC_VV:
1228 case RISCV::VFWNMACC_VF:
1229 case RISCV::VFWMSAC_VV:
1230 case RISCV::VFWMSAC_VF:
1231 case RISCV::VFWNMSAC_VV:
1232 case RISCV::VFWNMSAC_VF:
1233 case RISCV::VFWMACCBF16_VV:
1234 case RISCV::VFWMACCBF16_VF:
1235 // Vector Floating-Point Square-Root Instruction
1236 case RISCV::VFSQRT_V:
1237 // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
1238 case RISCV::VFRSQRT7_V:
1239 // Vector Floating-Point Reciprocal Estimate Instruction
1240 case RISCV::VFREC7_V:
1241 // Vector Floating-Point MIN/MAX Instructions
1242 case RISCV::VFMIN_VF:
1243 case RISCV::VFMIN_VV:
1244 case RISCV::VFMAX_VF:
1245 case RISCV::VFMAX_VV:
1246 // Vector Floating-Point Sign-Injection Instructions
1247 case RISCV::VFSGNJ_VF:
1248 case RISCV::VFSGNJ_VV:
1249 case RISCV::VFSGNJN_VV:
1250 case RISCV::VFSGNJN_VF:
1251 case RISCV::VFSGNJX_VF:
1252 case RISCV::VFSGNJX_VV:
1253 // Vector Floating-Point Compare Instructions
1254 case RISCV::VMFEQ_VF:
1255 case RISCV::VMFEQ_VV:
1256 case RISCV::VMFNE_VF:
1257 case RISCV::VMFNE_VV:
1258 case RISCV::VMFLT_VF:
1259 case RISCV::VMFLT_VV:
1260 case RISCV::VMFLE_VF:
1261 case RISCV::VMFLE_VV:
1262 case RISCV::VMFGT_VF:
1263 case RISCV::VMFGE_VF:
1264 // Vector Floating-Point Classify Instruction
1265 case RISCV::VFCLASS_V:
1266 // Vector Floating-Point Merge Instruction
1267 case RISCV::VFMERGE_VFM:
1268 // Vector Floating-Point Move Instruction
1269 case RISCV::VFMV_V_F:
1270 // Single-Width Floating-Point/Integer Type-Convert Instructions
1271 case RISCV::VFCVT_XU_F_V:
1272 case RISCV::VFCVT_X_F_V:
1273 case RISCV::VFCVT_RTZ_XU_F_V:
1274 case RISCV::VFCVT_RTZ_X_F_V:
1275 case RISCV::VFCVT_F_XU_V:
1276 case RISCV::VFCVT_F_X_V:
1277 // Widening Floating-Point/Integer Type-Convert Instructions
1278 case RISCV::VFWCVT_XU_F_V:
1279 case RISCV::VFWCVT_X_F_V:
1280 case RISCV::VFWCVT_RTZ_XU_F_V:
1281 case RISCV::VFWCVT_RTZ_X_F_V:
1282 case RISCV::VFWCVT_F_XU_V:
1283 case RISCV::VFWCVT_F_X_V:
1284 case RISCV::VFWCVT_F_F_V:
1285 case RISCV::VFWCVTBF16_F_F_V:
1286 // Narrowing Floating-Point/Integer Type-Convert Instructions
1287 case RISCV::VFNCVT_XU_F_W:
1288 case RISCV::VFNCVT_X_F_W:
1289 case RISCV::VFNCVT_RTZ_XU_F_W:
1290 case RISCV::VFNCVT_RTZ_X_F_W:
1291 case RISCV::VFNCVT_F_XU_W:
1292 case RISCV::VFNCVT_F_X_W:
1293 case RISCV::VFNCVT_F_F_W:
1294 case RISCV::VFNCVT_ROD_F_F_W:
1295 case RISCV::VFNCVTBF16_F_F_W:
1296 return true;
1297 }
1298
1299 return false;
1300}
1301
1302/// Return true if MO is a vector operand but is used as a scalar operand.
1304 const MachineInstr *MI = MO.getParent();
1306 RISCVVPseudosTable::getPseudoInfo(MI->getOpcode());
1307
1308 if (!RVV)
1309 return false;
1310
1311 switch (RVV->BaseInstr) {
1312 // Reductions only use vs1[0] of vs1
1313 case RISCV::VREDAND_VS:
1314 case RISCV::VREDMAX_VS:
1315 case RISCV::VREDMAXU_VS:
1316 case RISCV::VREDMIN_VS:
1317 case RISCV::VREDMINU_VS:
1318 case RISCV::VREDOR_VS:
1319 case RISCV::VREDSUM_VS:
1320 case RISCV::VREDXOR_VS:
1321 case RISCV::VWREDSUM_VS:
1322 case RISCV::VWREDSUMU_VS:
1323 case RISCV::VFREDMAX_VS:
1324 case RISCV::VFREDMIN_VS:
1325 case RISCV::VFREDOSUM_VS:
1326 case RISCV::VFREDUSUM_VS:
1327 case RISCV::VFWREDOSUM_VS:
1328 case RISCV::VFWREDUSUM_VS:
1329 return MO.getOperandNo() == 3;
1330 case RISCV::VMV_X_S:
1331 case RISCV::VFMV_F_S:
1332 return MO.getOperandNo() == 1;
1333 default:
1334 return false;
1335 }
1336}
1337
1338bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
1339 const MCInstrDesc &Desc = MI.getDesc();
1340 if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
1341 return false;
1342
1343 if (MI.getNumExplicitDefs() != 1)
1344 return false;
1345
1346 // Some instructions have implicit defs e.g. $vxsat. If they might be read
1347 // later then we can't reduce VL.
1348 if (!MI.allImplicitDefsAreDead()) {
1349 LLVM_DEBUG(dbgs() << "Not a candidate because has non-dead implicit def\n");
1350 return false;
1351 }
1352
1353 if (MI.mayRaiseFPException()) {
1354 LLVM_DEBUG(dbgs() << "Not a candidate because may raise FP exception\n");
1355 return false;
1356 }
1357
1358 for (const MachineMemOperand *MMO : MI.memoperands()) {
1359 if (MMO->isVolatile()) {
1360 LLVM_DEBUG(dbgs() << "Not a candidate because contains volatile MMO\n");
1361 return false;
1362 }
1363 }
1364
1365 // Some instructions that produce vectors have semantics that make it more
1366 // difficult to determine whether the VL can be reduced. For example, some
1367 // instructions, such as reductions, may write lanes past VL to a scalar
1368 // register. Other instructions, such as some loads or stores, may write
1369 // lower lanes using data from higher lanes. There may be other complex
1370 // semantics not mentioned here that make it hard to determine whether
1371 // the VL can be optimized. As a result, a white-list of supported
1372 // instructions is used. Over time, more instructions can be supported
1373 // upon careful examination of their semantics under the logic in this
1374 // optimization.
1375 // TODO: Use a better approach than a white-list, such as adding
1376 // properties to instructions using something like TSFlags.
1377 if (!isSupportedInstr(MI)) {
1378 LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction: "
1379 << MI);
1380 return false;
1381 }
1382
1384 TII->get(RISCV::getRVVMCOpcode(MI.getOpcode())).TSFlags) &&
1385 "Instruction shouldn't be supported if elements depend on VL");
1386
1388 MRI->getRegClass(MI.getOperand(0).getReg())->TSFlags) &&
1389 "All supported instructions produce a vector register result");
1390
1391 LLVM_DEBUG(dbgs() << "Found a candidate for VL reduction: " << MI << "\n");
1392 return true;
1393}
1394
1395DemandedVL
1396RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
1397 const MachineInstr &UserMI = *UserOp.getParent();
1398 const MCInstrDesc &Desc = UserMI.getDesc();
1399
1400 if (UserMI.isPHI() || UserMI.isFullCopy() || isTupleInsertInstr(UserMI))
1401 return DemandedVLs.lookup(&UserMI);
1402
1403 if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
1404 LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
1405 " use VLMAX\n");
1406 return DemandedVL::vlmax();
1407 }
1408
1410 TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) {
1411 LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
1412 return DemandedVL::vlmax();
1413 }
1414
1415 unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1416 const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
1417 // Looking for an immediate or a register VL that isn't X0.
1418 assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
1419 "Did not expect X0 VL");
1420
1421 // If the user is a passthru it will read the elements past VL, so
1422 // abort if any of the elements past VL are demanded.
1423 if (UserOp.isTied()) {
1424 assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() &&
1426 if (!RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp)) {
1427 LLVM_DEBUG(dbgs() << " Abort because user is passthru in "
1428 "instruction with demanded tail\n");
1429 return DemandedVL::vlmax();
1430 }
1431 }
1432
1433 // Instructions like reductions may use a vector register as a scalar
1434 // register. In this case, we should treat it as only reading the first lane.
1435 if (isVectorOpUsedAsScalarOp(UserOp)) {
1436 LLVM_DEBUG(dbgs() << " Used this operand as a scalar operand\n");
1437 return MachineOperand::CreateImm(1);
1438 }
1439
1440 // If we know the demanded VL of UserMI, then we can reduce the VL it
1441 // requires.
1442 if (RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp))
1443 return DemandedVLs.lookup(&UserMI);
1444
1445 return VLOp;
1446}
1447
1448/// Return true if MI is an instruction used for assembling registers
1449/// for segmented store instructions, namely, RISCVISD::TUPLE_INSERT.
1450/// Currently it's lowered to INSERT_SUBREG.
1452 if (!MI.isInsertSubreg())
1453 return false;
1454
1455 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1456 const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
1457 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1458 if (!RISCVRI::isVRegClass(DstRC->TSFlags))
1459 return false;
1460 unsigned NF = RISCVRI::getNF(DstRC->TSFlags);
1461 if (NF < 2)
1462 return false;
1463
1464 // Check whether INSERT_SUBREG has the correct subreg index for tuple inserts.
1465 auto VLMul = RISCVRI::getLMul(DstRC->TSFlags);
1466 unsigned SubRegIdx = MI.getOperand(3).getImm();
1467 [[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
1468 assert(!IsFractional && "unexpected LMUL for tuple register classes");
1469 return TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul;
1470}
1471
1473 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
1474 case VSSEG_CASES(8):
1475 case VSSSEG_CASES(8):
1476 case VSUXSEG_CASES(8):
1477 case VSOXSEG_CASES(8):
1478 case VSSEG_CASES(16):
1479 case VSSSEG_CASES(16):
1480 case VSUXSEG_CASES(16):
1481 case VSOXSEG_CASES(16):
1482 case VSSEG_CASES(32):
1483 case VSSSEG_CASES(32):
1484 case VSUXSEG_CASES(32):
1485 case VSOXSEG_CASES(32):
1486 case VSSEG_CASES(64):
1487 case VSSSEG_CASES(64):
1488 case VSUXSEG_CASES(64):
1489 case VSOXSEG_CASES(64):
1490 return true;
1491 default:
1492 return false;
1493 }
1494}
1495
1496bool RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
1497 if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI))
1498 return true;
1499
1500 SmallSetVector<MachineOperand *, 8> OpWorklist;
1501 SmallPtrSet<const MachineInstr *, 4> PHISeen;
1502 for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg()))
1503 OpWorklist.insert(&UserOp);
1504
1505 while (!OpWorklist.empty()) {
1506 MachineOperand &UserOp = *OpWorklist.pop_back_val();
1507 const MachineInstr &UserMI = *UserOp.getParent();
1508 LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n");
1509
1510 if (UserMI.isFullCopy() && UserMI.getOperand(0).getReg().isVirtual()) {
1511 LLVM_DEBUG(dbgs() << " Peeking through uses of COPY\n");
1513 MRI->use_operands(UserMI.getOperand(0).getReg())));
1514 continue;
1515 }
1516
1517 if (isTupleInsertInstr(UserMI)) {
1518 LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n");
1519 for (MachineOperand &UseOp :
1520 MRI->use_operands(UserMI.getOperand(0).getReg())) {
1521 const MachineInstr &CandidateMI = *UseOp.getParent();
1522 // We should not propagate the VL if the user is not a segmented store
1523 // or another INSERT_SUBREG, since VL just works differently
1524 // between segmented operations (per-field) v.s. other RVV ops (on the
1525 // whole register group).
1526 if (!isTupleInsertInstr(CandidateMI) &&
1527 !isSegmentedStoreInstr(CandidateMI))
1528 return false;
1529 OpWorklist.insert(&UseOp);
1530 }
1531 continue;
1532 }
1533
1534 if (UserMI.isPHI()) {
1535 // Don't follow PHI cycles
1536 if (!PHISeen.insert(&UserMI).second)
1537 continue;
1538 LLVM_DEBUG(dbgs() << " Peeking through uses of PHI\n");
1540 MRI->use_operands(UserMI.getOperand(0).getReg())));
1541 continue;
1542 }
1543
1544 if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) {
1545 LLVM_DEBUG(dbgs() << " Abort due to lack of SEW operand\n");
1546 return false;
1547 }
1548
1549 std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp);
1550 std::optional<OperandInfo> ProducerInfo = getOperandInfo(MI.getOperand(0));
1551 if (!ConsumerInfo || !ProducerInfo) {
1552 LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n");
1553 LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
1554 LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
1555 return false;
1556 }
1557
1558 if (!OperandInfo::areCompatible(*ProducerInfo, *ConsumerInfo)) {
1559 LLVM_DEBUG(
1560 dbgs()
1561 << " Abort due to incompatible information for EMUL or EEW.\n");
1562 LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
1563 LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
1564 return false;
1565 }
1566 }
1567
1568 return true;
1569}
1570
1571bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
1572 LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI);
1573
1574 unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
1575 MachineOperand &VLOp = MI.getOperand(VLOpNum);
1576
1577 // If the VL is 1, then there is no need to reduce it. This is an
1578 // optimization, not needed to preserve correctness.
1579 if (VLOp.isImm() && VLOp.getImm() == 1) {
1580 LLVM_DEBUG(dbgs() << " Abort due to VL == 1, no point in reducing.\n");
1581 return false;
1582 }
1583
1584 auto *CommonVL = &DemandedVLs.at(&MI).VL;
1585
1586 assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&
1587 "Expected VL to be an Imm or virtual Reg");
1588
1589 // If the VL is defined by a vleff that doesn't dominate MI, try using the
1590 // vleff's AVL. It will be greater than or equal to the output VL.
1591 if (CommonVL->isReg()) {
1592 const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
1593 if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) &&
1594 !MDT->dominates(VLMI, &MI))
1595 CommonVL = &VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
1596 }
1597
1598 if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {
1599 LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
1600 return false;
1601 }
1602
1603 if (CommonVL->isIdenticalTo(VLOp)) {
1604 LLVM_DEBUG(
1605 dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n");
1606 return false;
1607 }
1608
1609 if (CommonVL->isImm()) {
1610 LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
1611 << CommonVL->getImm() << " for " << MI << "\n");
1612 VLOp.ChangeToImmediate(CommonVL->getImm());
1613 return true;
1614 }
1615 const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
1616 if (!MDT->dominates(VLMI, &MI)) {
1617 LLVM_DEBUG(dbgs() << " Abort due to VL not dominating.\n");
1618 return false;
1619 }
1620 LLVM_DEBUG(
1621 dbgs() << " Reduce VL from " << VLOp << " to "
1622 << printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo())
1623 << " for " << MI << "\n");
1624
1625 // All our checks passed. We can reduce VL.
1626 VLOp.ChangeToRegister(CommonVL->getReg(), false);
1627 return true;
1628}
1629
1630static bool isPhysical(const MachineOperand &MO) {
1631 return MO.isReg() && MO.getReg().isPhysical();
1632}
1633
1634/// Look through \p MI's operands and propagate what it demands to its uses.
1635void RISCVVLOptimizer::transfer(const MachineInstr &MI) {
1636 if (!isSupportedInstr(MI) || !checkUsers(MI) || any_of(MI.defs(), isPhysical))
1637 DemandedVLs[&MI] = DemandedVL::vlmax();
1638
1639 for (const MachineOperand &MO : virtual_vec_uses(MI)) {
1640 const MachineInstr *Def = MRI->getVRegDef(MO.getReg());
1641 DemandedVL Prev = DemandedVLs[Def];
1642 DemandedVLs[Def] = DemandedVLs[Def].max(getMinimumVLForUser(MO));
1643 if (DemandedVLs[Def] != Prev)
1644 Worklist.insert(Def);
1645 }
1646}
1647
1648bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
1649 if (skipFunction(MF.getFunction()))
1650 return false;
1651
1652 MRI = &MF.getRegInfo();
1653 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1654
1655 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1656 if (!ST.hasVInstructions())
1657 return false;
1658
1659 TII = ST.getInstrInfo();
1660
1661 assert(DemandedVLs.empty());
1662
1663 // For each instruction that defines a vector, propagate the VL it
1664 // uses to its inputs.
1665 for (MachineBasicBlock *MBB : post_order(&MF)) {
1667 for (MachineInstr &MI : reverse(*MBB))
1668 if (!MI.isDebugInstr())
1669 Worklist.insert(&MI);
1670 }
1671
1672 while (!Worklist.empty()) {
1673 const MachineInstr *MI = Worklist.front();
1674 Worklist.remove(MI);
1675 transfer(*MI);
1676 }
1677
1678 // Then go through and see if we can reduce the VL of any instructions to
1679 // only what's demanded.
1680 bool MadeChange = false;
1681 for (MachineBasicBlock &MBB : MF) {
1682 // Avoid unreachable blocks as they have degenerate dominance
1683 if (!MDT->isReachableFromEntry(&MBB))
1684 continue;
1685
1686 for (auto &MI : reverse(MBB)) {
1687 if (!isCandidate(MI))
1688 continue;
1689 if (!tryReduceVL(MI))
1690 continue;
1691 MadeChange = true;
1692 }
1693 }
1694
1695 DemandedVLs.clear();
1696 return MadeChange;
1697}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_UNUSED
Definition Compiler.h:298
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static unsigned getIntegerExtensionOperandEEW(unsigned Factor, const MachineInstr &MI, const MachineOperand &MO)
Dest has EEW=SEW.
static std::optional< OperandInfo > getOperandInfo(const MachineOperand &MO)
#define VSOXSEG_CASES(EEW)
static bool isSegmentedStoreInstr(const MachineInstr &MI)
static bool isVectorOpUsedAsScalarOp(const MachineOperand &MO)
Return true if MO is a vector operand but is used as a scalar operand.
static std::optional< unsigned > getOperandLog2EEW(const MachineOperand &MO)
static std::pair< unsigned, bool > getEMULEqualsEEWDivSEWTimesLMUL(unsigned Log2EEW, const MachineInstr &MI)
Return EMUL = (EEW / SEW) * LMUL where EEW comes from Log2EEW and LMUL and SEW are from the TSFlags o...
#define VSUXSEG_CASES(EEW)
static bool isPhysical(const MachineOperand &MO)
static bool isSupportedInstr(const MachineInstr &MI)
Return true if this optimization should consider MI for VL reduction.
#define VSSSEG_CASES(EEW)
#define VSSEG_CASES(EEW)
static bool isTupleInsertInstr(const MachineInstr &MI)
Return true if MI is an instruction used for assembling registers for segmented store instructions,...
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
bool isReachableFromEntry(const NodeT *A) const
isReachableFromEntry - Return true if A is dominated by the entry block of the function containing it...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Describe properties that are true of each instruction in the target description file.
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:87
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
A vector that has set insertion semantics.
Definition SetVector.h:59
void insert_range(Range &&R)
Definition SetVector.h:193
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
value_type pop_back_val()
Definition SetVector.h:296
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
const uint8_t TSFlags
Configurable target specific flags.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
static bool readsPastVL(uint64_t TSFlags)
static bool isTiedPseudo(uint64_t TSFlags)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool hasVLOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool elementsDependOnVL(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
static unsigned getNF(uint8_t TSFlags)
static bool isVRegClass(uint8_t TSFlags)
static RISCVVType::VLMUL getLMul(uint8_t TSFlags)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS)
Given two VL operands, do we know that LHS <= RHS?
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
FunctionPass * createRISCVVLOptimizerPass()
iterator_range< po_iterator< T > > post_order(const T &G)
Op::Description Desc
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:552
@ Other
Any other memory.
Definition ModRef.h:68
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.