LLVM 19.0.0git
RISCVInsertVSETVLI.cpp
Go to the documentation of this file.
1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
29#include "llvm/ADT/Statistic.h"
34#include <queue>
35using namespace llvm;
36
37#define DEBUG_TYPE "riscv-insert-vsetvli"
38#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
40
41STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
43
45 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
46 cl::desc("Disable looking through phis when inserting vsetvlis."));
47
48namespace {
49
50static unsigned getVLOpNum(const MachineInstr &MI) {
51 return RISCVII::getVLOpNum(MI.getDesc());
52}
53
54static unsigned getSEWOpNum(const MachineInstr &MI) {
55 return RISCVII::getSEWOpNum(MI.getDesc());
56}
57
58static bool isVectorConfigInstr(const MachineInstr &MI) {
59 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
60 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
61 MI.getOpcode() == RISCV::PseudoVSETIVLI;
62}
63
64/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
65/// VL and only sets VTYPE.
66static bool isVLPreservingConfig(const MachineInstr &MI) {
67 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
68 return false;
69 assert(RISCV::X0 == MI.getOperand(1).getReg());
70 return RISCV::X0 == MI.getOperand(0).getReg();
71}
72
73static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
74 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
75 default:
76 return false;
77 case RISCV::VFMV_S_F:
78 case RISCV::VFMV_V_F:
79 return true;
80 }
81}
82
83static bool isScalarExtractInstr(const MachineInstr &MI) {
84 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
85 default:
86 return false;
87 case RISCV::VMV_X_S:
88 case RISCV::VFMV_F_S:
89 return true;
90 }
91}
92
93static bool isScalarInsertInstr(const MachineInstr &MI) {
94 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
95 default:
96 return false;
97 case RISCV::VMV_S_X:
98 case RISCV::VFMV_S_F:
99 return true;
100 }
101}
102
103static bool isScalarSplatInstr(const MachineInstr &MI) {
104 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
105 default:
106 return false;
107 case RISCV::VMV_V_I:
108 case RISCV::VMV_V_X:
109 case RISCV::VFMV_V_F:
110 return true;
111 }
112}
113
114static bool isVSlideInstr(const MachineInstr &MI) {
115 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
116 default:
117 return false;
118 case RISCV::VSLIDEDOWN_VX:
119 case RISCV::VSLIDEDOWN_VI:
120 case RISCV::VSLIDEUP_VX:
121 case RISCV::VSLIDEUP_VI:
122 return true;
123 }
124}
125
126/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
127/// not a load or store which ignores SEW.
128static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
129 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
130 default:
131 return std::nullopt;
132 case RISCV::VLE8_V:
133 case RISCV::VLSE8_V:
134 case RISCV::VSE8_V:
135 case RISCV::VSSE8_V:
136 return 8;
137 case RISCV::VLE16_V:
138 case RISCV::VLSE16_V:
139 case RISCV::VSE16_V:
140 case RISCV::VSSE16_V:
141 return 16;
142 case RISCV::VLE32_V:
143 case RISCV::VLSE32_V:
144 case RISCV::VSE32_V:
145 case RISCV::VSSE32_V:
146 return 32;
147 case RISCV::VLE64_V:
148 case RISCV::VLSE64_V:
149 case RISCV::VSE64_V:
150 case RISCV::VSSE64_V:
151 return 64;
152 }
153}
154
155static bool isNonZeroLoadImmediate(const MachineInstr &MI) {
156 return MI.getOpcode() == RISCV::ADDI &&
157 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
158 MI.getOperand(1).getReg() == RISCV::X0 &&
159 MI.getOperand(2).getImm() != 0;
160}
161
162/// Return true if this is an operation on mask registers. Note that
163/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
164static bool isMaskRegOp(const MachineInstr &MI) {
165 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
166 return false;
167 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
168 // A Log2SEW of 0 is an operation on mask registers only.
169 return Log2SEW == 0;
170}
171
172/// Return true if the inactive elements in the result are entirely undefined.
173/// Note that this is different from "agnostic" as defined by the vector
174/// specification. Agnostic requires each lane to either be undisturbed, or
175/// take the value -1; no other value is allowed.
176static bool hasUndefinedMergeOp(const MachineInstr &MI,
177 const MachineRegisterInfo &MRI) {
178
179 unsigned UseOpIdx;
180 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
181 // If there is no passthrough operand, then the pass through
182 // lanes are undefined.
183 return true;
184
185 // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
186 // operands are solely IMPLICIT_DEFS, then the pass through lanes are
187 // undefined.
188 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
189 if (UseMO.getReg() == RISCV::NoRegister)
190 return true;
191
192 if (UseMO.isUndef())
193 return true;
194 if (UseMO.getReg().isPhysical())
195 return false;
196
197 if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
198 if (UseMI->isImplicitDef())
199 return true;
200
201 if (UseMI->isRegSequence()) {
202 for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
203 MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg());
204 if (!SourceMI || !SourceMI->isImplicitDef())
205 return false;
206 }
207 return true;
208 }
209 }
210 return false;
211}
212
213/// Which subfields of VL or VTYPE have values we need to preserve?
214struct DemandedFields {
215 // Some unknown property of VL is used. If demanded, must preserve entire
216 // value.
217 bool VLAny = false;
218 // Only zero vs non-zero is used. If demanded, can change non-zero values.
219 bool VLZeroness = false;
220 // What properties of SEW we need to preserve.
221 enum : uint8_t {
222 SEWEqual = 3, // The exact value of SEW needs to be preserved.
223 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
224 // than or equal to the original value.
225 SEWGreaterThanOrEqualAndLessThan64 =
226 1, // SEW can be changed as long as it's greater
227 // than or equal to the original value, but must be less
228 // than 64.
229 SEWNone = 0 // We don't need to preserve SEW at all.
230 } SEW = SEWNone;
231 bool LMUL = false;
232 bool SEWLMULRatio = false;
233 bool TailPolicy = false;
234 bool MaskPolicy = false;
235
236 // Return true if any part of VTYPE was used
237 bool usedVTYPE() const {
238 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
239 }
240
241 // Return true if any property of VL was used
242 bool usedVL() {
243 return VLAny || VLZeroness;
244 }
245
246 // Mark all VTYPE subfields and properties as demanded
247 void demandVTYPE() {
248 SEW = SEWEqual;
249 LMUL = true;
250 SEWLMULRatio = true;
251 TailPolicy = true;
252 MaskPolicy = true;
253 }
254
255 // Mark all VL properties as demanded
256 void demandVL() {
257 VLAny = true;
258 VLZeroness = true;
259 }
260
261 // Make this the result of demanding both the fields in this and B.
262 void doUnion(const DemandedFields &B) {
263 VLAny |= B.VLAny;
264 VLZeroness |= B.VLZeroness;
265 SEW = std::max(SEW, B.SEW);
266 LMUL |= B.LMUL;
267 SEWLMULRatio |= B.SEWLMULRatio;
268 TailPolicy |= B.TailPolicy;
269 MaskPolicy |= B.MaskPolicy;
270 }
271
272#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
273 /// Support for debugging, callable in GDB: V->dump()
274 LLVM_DUMP_METHOD void dump() const {
275 print(dbgs());
276 dbgs() << "\n";
277 }
278
279 /// Implement operator<<.
280 void print(raw_ostream &OS) const {
281 OS << "{";
282 OS << "VLAny=" << VLAny << ", ";
283 OS << "VLZeroness=" << VLZeroness << ", ";
284 OS << "SEW=";
285 switch (SEW) {
286 case SEWEqual:
287 OS << "SEWEqual";
288 break;
289 case SEWGreaterThanOrEqual:
290 OS << "SEWGreaterThanOrEqual";
291 break;
292 case SEWGreaterThanOrEqualAndLessThan64:
293 OS << "SEWGreaterThanOrEqualAndLessThan64";
294 break;
295 case SEWNone:
296 OS << "SEWNone";
297 break;
298 };
299 OS << ", ";
300 OS << "LMUL=" << LMUL << ", ";
301 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
302 OS << "TailPolicy=" << TailPolicy << ", ";
303 OS << "MaskPolicy=" << MaskPolicy;
304 OS << "}";
305 }
306#endif
307};
308
309#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
311inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
312 DF.print(OS);
313 return OS;
314}
315#endif
316
317/// Return true if moving from CurVType to NewVType is
318/// indistinguishable from the perspective of an instruction (or set
319/// of instructions) which use only the Used subfields and properties.
320static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
321 const DemandedFields &Used) {
322 switch (Used.SEW) {
323 case DemandedFields::SEWNone:
324 break;
325 case DemandedFields::SEWEqual:
326 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
327 return false;
328 break;
329 case DemandedFields::SEWGreaterThanOrEqual:
330 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
331 return false;
332 break;
333 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
334 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
335 RISCVVType::getSEW(NewVType) >= 64)
336 return false;
337 break;
338 }
339
340 if (Used.LMUL &&
341 RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
342 return false;
343
344 if (Used.SEWLMULRatio) {
345 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
346 RISCVVType::getVLMUL(CurVType));
347 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
348 RISCVVType::getVLMUL(NewVType));
349 if (Ratio1 != Ratio2)
350 return false;
351 }
352
353 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
355 return false;
356 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
358 return false;
359 return true;
360}
361
362/// Return the fields and properties demanded by the provided instruction.
363DemandedFields getDemanded(const MachineInstr &MI,
365 const RISCVSubtarget *ST) {
366 // Warning: This function has to work on both the lowered (i.e. post
367 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
368 // that it can't use the value of a SEW, VL, or Policy operand as they might
369 // be stale after lowering.
370
371 // Most instructions don't use any of these subfeilds.
372 DemandedFields Res;
373 // Start conservative if registers are used
374 if (MI.isCall() || MI.isInlineAsm() ||
375 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
376 Res.demandVL();
377 if (MI.isCall() || MI.isInlineAsm() ||
378 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
379 Res.demandVTYPE();
380 // Start conservative on the unlowered form too
381 uint64_t TSFlags = MI.getDesc().TSFlags;
382 if (RISCVII::hasSEWOp(TSFlags)) {
383 Res.demandVTYPE();
384 if (RISCVII::hasVLOp(TSFlags))
385 Res.demandVL();
386
387 // Behavior is independent of mask policy.
388 if (!RISCVII::usesMaskPolicy(TSFlags))
389 Res.MaskPolicy = false;
390 }
391
392 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
393 // They instead demand the ratio of the two which is used in computing
394 // EMUL, but which allows us the flexibility to change SEW and LMUL
395 // provided we don't change the ratio.
396 // Note: We assume that the instructions initial SEW is the EEW encoded
397 // in the opcode. This is asserted when constructing the VSETVLIInfo.
398 if (getEEWForLoadStore(MI)) {
399 Res.SEW = DemandedFields::SEWNone;
400 Res.LMUL = false;
401 }
402
403 // Store instructions don't use the policy fields.
404 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
405 Res.TailPolicy = false;
406 Res.MaskPolicy = false;
407 }
408
409 // If this is a mask reg operation, it only cares about VLMAX.
410 // TODO: Possible extensions to this logic
411 // * Probably ok if available VLMax is larger than demanded
412 // * The policy bits can probably be ignored..
413 if (isMaskRegOp(MI)) {
414 Res.SEW = DemandedFields::SEWNone;
415 Res.LMUL = false;
416 }
417
418 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
419 if (isScalarInsertInstr(MI)) {
420 Res.LMUL = false;
421 Res.SEWLMULRatio = false;
422 Res.VLAny = false;
423 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
424 // need to preserve any other bits and are thus compatible with any larger,
425 // etype and can disregard policy bits. Warning: It's tempting to try doing
426 // this for any tail agnostic operation, but we can't as TA requires
427 // tail lanes to either be the original value or -1. We are writing
428 // unknown bits to the lanes here.
429 if (hasUndefinedMergeOp(MI, *MRI)) {
430 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
431 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
432 else
433 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
434 Res.TailPolicy = false;
435 }
436 }
437
438 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
439 if (isScalarExtractInstr(MI)) {
440 assert(!RISCVII::hasVLOp(TSFlags));
441 Res.LMUL = false;
442 Res.SEWLMULRatio = false;
443 Res.TailPolicy = false;
444 Res.MaskPolicy = false;
445 }
446
447 return Res;
448}
449
450/// Defines the abstract state with which the forward dataflow models the
451/// values of the VL and VTYPE registers after insertion.
452class VSETVLIInfo {
453 struct AVLDef {
454 const MachineInstr *DefMI;
455 Register DefReg;
456 };
457 union {
458 AVLDef AVLRegDef;
459 unsigned AVLImm;
460 };
461
462 enum : uint8_t {
464 AVLIsReg,
465 AVLIsImm,
466 AVLIsVLMAX,
467 AVLIsIgnored,
468 Unknown,
469 } State = Uninitialized;
470
471 // Fields from VTYPE.
473 uint8_t SEW = 0;
474 uint8_t TailAgnostic : 1;
475 uint8_t MaskAgnostic : 1;
476 uint8_t SEWLMULRatioOnly : 1;
477
478public:
479 VSETVLIInfo()
480 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
481 SEWLMULRatioOnly(false) {}
482
483 static VSETVLIInfo getUnknown() {
484 VSETVLIInfo Info;
485 Info.setUnknown();
486 return Info;
487 }
488
489 bool isValid() const { return State != Uninitialized; }
490 void setUnknown() { State = Unknown; }
491 bool isUnknown() const { return State == Unknown; }
492
495 AVLRegDef.DefMI = DefMI;
496 AVLRegDef.DefReg = AVLReg;
497 State = AVLIsReg;
498 }
499
500 void setAVLImm(unsigned Imm) {
501 AVLImm = Imm;
502 State = AVLIsImm;
503 }
504
505 void setAVLVLMAX() { State = AVLIsVLMAX; }
506
507 void setAVLIgnored() { State = AVLIsIgnored; }
508
509 bool hasAVLImm() const { return State == AVLIsImm; }
510 bool hasAVLReg() const { return State == AVLIsReg; }
511 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
512 bool hasAVLIgnored() const { return State == AVLIsIgnored; }
513 Register getAVLReg() const {
514 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
515 return AVLRegDef.DefReg;
516 }
517 unsigned getAVLImm() const {
518 assert(hasAVLImm());
519 return AVLImm;
520 }
521 const MachineInstr &getAVLDefMI() const {
522 assert(hasAVLReg() && AVLRegDef.DefMI);
523 return *AVLRegDef.DefMI;
524 }
525
526 void setAVL(VSETVLIInfo Info) {
527 assert(Info.isValid());
528 if (Info.isUnknown())
529 setUnknown();
530 else if (Info.hasAVLReg())
531 setAVLRegDef(&Info.getAVLDefMI(), Info.getAVLReg());
532 else if (Info.hasAVLVLMAX())
533 setAVLVLMAX();
534 else if (Info.hasAVLIgnored())
535 setAVLIgnored();
536 else {
537 assert(Info.hasAVLImm());
538 setAVLImm(Info.getAVLImm());
539 }
540 }
541
542 unsigned getSEW() const { return SEW; }
543 RISCVII::VLMUL getVLMUL() const { return VLMul; }
544 bool getTailAgnostic() const { return TailAgnostic; }
545 bool getMaskAgnostic() const { return MaskAgnostic; }
546
547 bool hasNonZeroAVL() const {
548 if (hasAVLImm())
549 return getAVLImm() > 0;
550 if (hasAVLReg())
551 return isNonZeroLoadImmediate(getAVLDefMI());
552 if (hasAVLVLMAX())
553 return true;
554 if (hasAVLIgnored())
555 return false;
556 return false;
557 }
558
559 bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const {
560 if (hasSameAVL(Other))
561 return true;
562 return (hasNonZeroAVL() && Other.hasNonZeroAVL());
563 }
564
565 bool hasSameAVL(const VSETVLIInfo &Other) const {
566 if (hasAVLReg() && Other.hasAVLReg())
567 return AVLRegDef.DefMI == Other.AVLRegDef.DefMI &&
568 AVLRegDef.DefReg == Other.AVLRegDef.DefReg;
569
570 if (hasAVLImm() && Other.hasAVLImm())
571 return getAVLImm() == Other.getAVLImm();
572
573 if (hasAVLVLMAX())
574 return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
575
576 if (hasAVLIgnored())
577 return Other.hasAVLIgnored();
578
579 return false;
580 }
581
582 void setVTYPE(unsigned VType) {
583 assert(isValid() && !isUnknown() &&
584 "Can't set VTYPE for uninitialized or unknown");
585 VLMul = RISCVVType::getVLMUL(VType);
586 SEW = RISCVVType::getSEW(VType);
587 TailAgnostic = RISCVVType::isTailAgnostic(VType);
588 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
589 }
590 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
591 assert(isValid() && !isUnknown() &&
592 "Can't set VTYPE for uninitialized or unknown");
593 VLMul = L;
594 SEW = S;
595 TailAgnostic = TA;
596 MaskAgnostic = MA;
597 }
598
599 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
600
601 unsigned encodeVTYPE() const {
602 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
603 "Can't encode VTYPE for uninitialized or unknown");
604 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
605 }
606
607 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
608
609 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
610 assert(isValid() && Other.isValid() &&
611 "Can't compare invalid VSETVLIInfos");
612 assert(!isUnknown() && !Other.isUnknown() &&
613 "Can't compare VTYPE in unknown state");
614 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
615 "Can't compare when only LMUL/SEW ratio is valid.");
616 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
617 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
618 Other.MaskAgnostic);
619 }
620
621 unsigned getSEWLMULRatio() const {
622 assert(isValid() && !isUnknown() &&
623 "Can't use VTYPE for uninitialized or unknown");
624 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
625 }
626
627 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
628 // Note that having the same VLMAX ensures that both share the same
629 // function from AVL to VL; that is, they must produce the same VL value
630 // for any given AVL value.
631 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
632 assert(isValid() && Other.isValid() &&
633 "Can't compare invalid VSETVLIInfos");
634 assert(!isUnknown() && !Other.isUnknown() &&
635 "Can't compare VTYPE in unknown state");
636 return getSEWLMULRatio() == Other.getSEWLMULRatio();
637 }
638
639 bool hasCompatibleVTYPE(const DemandedFields &Used,
640 const VSETVLIInfo &Require) const {
641 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
642 }
643
644 // Determine whether the vector instructions requirements represented by
645 // Require are compatible with the previous vsetvli instruction represented
646 // by this. MI is the instruction whose requirements we're considering.
647 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
648 const MachineRegisterInfo &MRI) const {
649 assert(isValid() && Require.isValid() &&
650 "Can't compare invalid VSETVLIInfos");
651 assert(!Require.SEWLMULRatioOnly &&
652 "Expected a valid VTYPE for instruction!");
653 // Nothing is compatible with Unknown.
654 if (isUnknown() || Require.isUnknown())
655 return false;
656
657 // If only our VLMAX ratio is valid, then this isn't compatible.
658 if (SEWLMULRatioOnly)
659 return false;
660
661 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
662 return false;
663
664 if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
665 return false;
666
667 return hasCompatibleVTYPE(Used, Require);
668 }
669
670 bool operator==(const VSETVLIInfo &Other) const {
671 // Uninitialized is only equal to another Uninitialized.
672 if (!isValid())
673 return !Other.isValid();
674 if (!Other.isValid())
675 return !isValid();
676
677 // Unknown is only equal to another Unknown.
678 if (isUnknown())
679 return Other.isUnknown();
680 if (Other.isUnknown())
681 return isUnknown();
682
683 if (!hasSameAVL(Other))
684 return false;
685
686 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
687 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
688 return false;
689
690 // If only the VLMAX is valid, check that it is the same.
691 if (SEWLMULRatioOnly)
692 return hasSameVLMAX(Other);
693
694 // If the full VTYPE is valid, check that it is the same.
695 return hasSameVTYPE(Other);
696 }
697
698 bool operator!=(const VSETVLIInfo &Other) const {
699 return !(*this == Other);
700 }
701
702 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
703 // both predecessors.
704 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
705 // If the new value isn't valid, ignore it.
706 if (!Other.isValid())
707 return *this;
708
709 // If this value isn't valid, this must be the first predecessor, use it.
710 if (!isValid())
711 return Other;
712
713 // If either is unknown, the result is unknown.
714 if (isUnknown() || Other.isUnknown())
715 return VSETVLIInfo::getUnknown();
716
717 // If we have an exact, match return this.
718 if (*this == Other)
719 return *this;
720
721 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
722 // return an SEW/LMUL ratio only value.
723 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
724 VSETVLIInfo MergeInfo = *this;
725 MergeInfo.SEWLMULRatioOnly = true;
726 return MergeInfo;
727 }
728
729 // Otherwise the result is unknown.
730 return VSETVLIInfo::getUnknown();
731 }
732
733#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
734 /// Support for debugging, callable in GDB: V->dump()
735 LLVM_DUMP_METHOD void dump() const {
736 print(dbgs());
737 dbgs() << "\n";
738 }
739
740 /// Implement operator<<.
741 /// @{
742 void print(raw_ostream &OS) const {
743 OS << "{";
744 if (!isValid())
745 OS << "Uninitialized";
746 if (isUnknown())
747 OS << "unknown";
748 if (hasAVLReg())
749 OS << "AVLReg=" << (unsigned)getAVLReg();
750 if (hasAVLImm())
751 OS << "AVLImm=" << (unsigned)AVLImm;
752 if (hasAVLVLMAX())
753 OS << "AVLVLMAX";
754 if (hasAVLIgnored())
755 OS << "AVLIgnored";
756 OS << ", "
757 << "VLMul=" << (unsigned)VLMul << ", "
758 << "SEW=" << (unsigned)SEW << ", "
759 << "TailAgnostic=" << (bool)TailAgnostic << ", "
760 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
761 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
762 }
763#endif
764};
765
766#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
768inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
769 V.print(OS);
770 return OS;
771}
772#endif
773
774struct BlockData {
775 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
776 // block. Calculated in Phase 2.
777 VSETVLIInfo Exit;
778
779 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
780 // blocks. Calculated in Phase 2, and used by Phase 3.
781 VSETVLIInfo Pred;
782
783 // Keeps track of whether the block is already in the queue.
784 bool InQueue = false;
785
786 BlockData() = default;
787};
788
789class RISCVInsertVSETVLI : public MachineFunctionPass {
790 const RISCVSubtarget *ST;
791 const TargetInstrInfo *TII;
793
794 std::vector<BlockData> BlockInfo;
795 std::queue<const MachineBasicBlock *> WorkList;
796
797public:
798 static char ID;
799
800 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
801 bool runOnMachineFunction(MachineFunction &MF) override;
802
803 void getAnalysisUsage(AnalysisUsage &AU) const override {
804 AU.setPreservesCFG();
806 }
807
808 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
809
810private:
811 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
812 const VSETVLIInfo &CurInfo) const;
813 bool needVSETVLIPHI(const VSETVLIInfo &Require,
814 const MachineBasicBlock &MBB) const;
815 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
816 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
817 void insertVSETVLI(MachineBasicBlock &MBB,
819 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
820
821 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
822 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
823 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
824 VSETVLIInfo &Info) const;
825 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
826 void emitVSETVLIs(MachineBasicBlock &MBB);
827 void doPRE(MachineBasicBlock &MBB);
828 void insertReadVL(MachineBasicBlock &MBB);
829};
830
831class RISCVCoalesceVSETVLI : public MachineFunctionPass {
832public:
833 static char ID;
834 const RISCVSubtarget *ST;
835 const TargetInstrInfo *TII;
837 LiveIntervals *LIS;
838
839 RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
840 bool runOnMachineFunction(MachineFunction &MF) override;
841
842 void getAnalysisUsage(AnalysisUsage &AU) const override {
843 AU.setPreservesCFG();
844
851
853 }
854
855 StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
856
857private:
858 bool coalesceVSETVLIs(MachineBasicBlock &MBB);
859};
860
861} // end anonymous namespace
862
863char RISCVInsertVSETVLI::ID = 0;
864
866 false, false)
867
868char RISCVCoalesceVSETVLI::ID = 0;
869
870INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
872
873// Return a VSETVLIInfo representing the changes made by this VSETVLI or
874// VSETIVLI instruction.
875static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI,
877 VSETVLIInfo NewInfo;
878 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
879 NewInfo.setAVLImm(MI.getOperand(1).getImm());
880 } else {
881 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
882 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
883 Register AVLReg = MI.getOperand(1).getReg();
884 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
885 "Can't handle X0, X0 vsetvli yet");
886 if (AVLReg == RISCV::X0)
887 NewInfo.setAVLVLMAX();
888 else
889 NewInfo.setAVLRegDef(MRI.getVRegDef(AVLReg), AVLReg);
890 }
891 NewInfo.setVTYPE(MI.getOperand(2).getImm());
892
893 return NewInfo;
894}
895
896static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
897 RISCVII::VLMUL VLMul) {
898 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
899 if (Fractional)
900 VLEN = VLEN / LMul;
901 else
902 VLEN = VLEN * LMul;
903 return VLEN/SEW;
904}
905
906static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
907 const RISCVSubtarget &ST,
908 const MachineRegisterInfo *MRI) {
909 VSETVLIInfo InstrInfo;
910
911 bool TailAgnostic = true;
912 bool MaskAgnostic = true;
913 if (!hasUndefinedMergeOp(MI, *MRI)) {
914 // Start with undisturbed.
915 TailAgnostic = false;
916 MaskAgnostic = false;
917
918 // If there is a policy operand, use it.
919 if (RISCVII::hasVecPolicyOp(TSFlags)) {
920 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
921 uint64_t Policy = Op.getImm();
923 "Invalid Policy Value");
924 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
925 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
926 }
927
928 // Some pseudo instructions force a tail agnostic policy despite having a
929 // tied def.
931 TailAgnostic = true;
932
933 if (!RISCVII::usesMaskPolicy(TSFlags))
934 MaskAgnostic = true;
935 }
936
937 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
938
939 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
940 // A Log2SEW of 0 is an operation on mask registers only.
941 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
942 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
943
944 if (RISCVII::hasVLOp(TSFlags)) {
945 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
946 if (VLOp.isImm()) {
947 int64_t Imm = VLOp.getImm();
948 // Conver the VLMax sentintel to X0 register.
949 if (Imm == RISCV::VLMaxSentinel) {
950 // If we know the exact VLEN, see if we can use the constant encoding
951 // for the VLMAX instead. This reduces register pressure slightly.
952 const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul);
953 if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31)
954 InstrInfo.setAVLImm(VLMAX);
955 else
956 InstrInfo.setAVLVLMAX();
957 }
958 else
959 InstrInfo.setAVLImm(Imm);
960 } else {
961 InstrInfo.setAVLRegDef(MRI->getVRegDef(VLOp.getReg()), VLOp.getReg());
962 }
963 } else {
964 assert(isScalarExtractInstr(MI));
965 // TODO: If we are more clever about x0,x0 insertion then we should be able
966 // to deduce that the VL is ignored based off of DemandedFields, and remove
967 // the AVLIsIgnored state. Then we can just use an arbitrary immediate AVL.
968 InstrInfo.setAVLIgnored();
969 }
970#ifndef NDEBUG
971 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
972 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
973 }
974#endif
975 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
976
977 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
978 // AVL operand with the AVL of the defining vsetvli. We avoid general
979 // register AVLs to avoid extending live ranges without being sure we can
980 // kill the original source reg entirely.
981 if (InstrInfo.hasAVLReg()) {
982 const MachineInstr &DefMI = InstrInfo.getAVLDefMI();
983 if (isVectorConfigInstr(DefMI)) {
984 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(DefMI, *MRI);
985 if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
986 (DefInstrInfo.hasAVLImm() || DefInstrInfo.hasAVLVLMAX()))
987 InstrInfo.setAVL(DefInstrInfo);
988 }
989 }
990
991 return InstrInfo;
992}
993
994void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
995 const VSETVLIInfo &Info,
996 const VSETVLIInfo &PrevInfo) {
997 DebugLoc DL = MI.getDebugLoc();
998 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
999}
1000
1001void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1003 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1004
1005 ++NumInsertedVSETVL;
1006 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1007 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1008 // VLMAX.
1009 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1010 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1012 .addReg(RISCV::X0, RegState::Kill)
1013 .addImm(Info.encodeVTYPE())
1014 .addReg(RISCV::VL, RegState::Implicit);
1015 return;
1016 }
1017
1018 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1019 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1020 // same, we can use the X0, X0 form.
1021 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1022 const MachineInstr &DefMI = Info.getAVLDefMI();
1023 if (isVectorConfigInstr(DefMI)) {
1024 VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
1025 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1026 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1028 .addReg(RISCV::X0, RegState::Kill)
1029 .addImm(Info.encodeVTYPE())
1030 .addReg(RISCV::VL, RegState::Implicit);
1031 return;
1032 }
1033 }
1034 }
1035 }
1036
1037 if (Info.hasAVLImm()) {
1038 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1040 .addImm(Info.getAVLImm())
1041 .addImm(Info.encodeVTYPE());
1042 return;
1043 }
1044
1045 if (Info.hasAVLIgnored()) {
1046 // We can only use x0, x0 if there's no chance of the vtype change causing
1047 // the previous vl to become invalid.
1048 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
1049 Info.hasSameVLMAX(PrevInfo)) {
1050 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1052 .addReg(RISCV::X0, RegState::Kill)
1053 .addImm(Info.encodeVTYPE())
1054 .addReg(RISCV::VL, RegState::Implicit);
1055 return;
1056 }
1057 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1058 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1060 .addImm(1)
1061 .addImm(Info.encodeVTYPE());
1062 return;
1063 }
1064
1065 if (Info.hasAVLVLMAX()) {
1066 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
1067 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1069 .addReg(RISCV::X0, RegState::Kill)
1070 .addImm(Info.encodeVTYPE());
1071 return;
1072 }
1073
1074 Register AVLReg = Info.getAVLReg();
1075 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1076 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1078 .addReg(AVLReg)
1079 .addImm(Info.encodeVTYPE());
1080}
1081
1083 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
1084 return Fractional || LMul == 1;
1085}
1086
1087/// Return true if a VSETVLI is required to transition from CurInfo to Require
1088/// before MI.
1089bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
1090 const VSETVLIInfo &Require,
1091 const VSETVLIInfo &CurInfo) const {
1092 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI));
1093
1094 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1095 return true;
1096
1097 DemandedFields Used = getDemanded(MI, MRI, ST);
1098
1099 // A slidedown/slideup with an *undefined* merge op can freely clobber
1100 // elements not copied from the source vector (e.g. masked off, tail, or
1101 // slideup's prefix). Notes:
1102 // * We can't modify SEW here since the slide amount is in units of SEW.
1103 // * VL=1 is special only because we have existing support for zero vs
1104 // non-zero VL. We could generalize this if we had a VL > C predicate.
1105 // * The LMUL1 restriction is for machines whose latency may depend on VL.
1106 // * As above, this is only legal for tail "undefined" not "agnostic".
1107 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1108 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1109 Used.VLAny = false;
1110 Used.VLZeroness = true;
1111 Used.LMUL = false;
1112 Used.TailPolicy = false;
1113 }
1114
1115 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1116 // semantically as vmv.s.x. This is particularly useful since we don't have an
1117 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1118 // Since a splat is non-constant time in LMUL, we do need to be careful to not
1119 // increase the number of active vector registers (unlike for vmv.s.x.)
1120 if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1121 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1122 Used.LMUL = false;
1123 Used.SEWLMULRatio = false;
1124 Used.VLAny = false;
1125 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
1126 Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
1127 else
1128 Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
1129 Used.TailPolicy = false;
1130 }
1131
1132 if (CurInfo.isCompatible(Used, Require, *MRI))
1133 return false;
1134
1135 // We didn't find a compatible value. If our AVL is a virtual register,
1136 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1137 // and the last VL/VTYPE we observed is the same, we don't need a
1138 // VSETVLI here.
1139 if (Require.hasAVLReg() && CurInfo.hasCompatibleVTYPE(Used, Require)) {
1140 const MachineInstr &DefMI = Require.getAVLDefMI();
1141 if (isVectorConfigInstr(DefMI)) {
1142 VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
1143 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
1144 return false;
1145 }
1146 }
1147
1148 return true;
1149}
1150
1151// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1152// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1153// places.
1154static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1155 DemandedFields &Demanded) {
1156 VSETVLIInfo Info = NewInfo;
1157
1158 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1159 !PrevInfo.isUnknown()) {
1160 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1161 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1162 Info.setVLMul(*NewVLMul);
1163 Demanded.LMUL = true;
1164 }
1165
1166 return Info;
1167}
1168
1169// Given an incoming state reaching MI, minimally modifies that state so that it
1170// is compatible with MI. The resulting state is guaranteed to be semantically
1171// legal for MI, but may not be the state requested by MI.
1172void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1173 const MachineInstr &MI) const {
1174 uint64_t TSFlags = MI.getDesc().TSFlags;
1175 if (!RISCVII::hasSEWOp(TSFlags))
1176 return;
1177
1178 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI);
1179 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1180 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
1181 return;
1182
1183 const VSETVLIInfo PrevInfo = Info;
1184 if (!Info.isValid() || Info.isUnknown())
1185 Info = NewInfo;
1186
1187 DemandedFields Demanded = getDemanded(MI, MRI, ST);
1188 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1189
1190 // If MI only demands that VL has the same zeroness, we only need to set the
1191 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1192 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1193 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1194 // variant, so we avoid the transform to prevent extending live range of an
1195 // avl register operand.
1196 // TODO: We can probably relax this for immediates.
1197 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo) &&
1198 IncomingInfo.hasSameVLMAX(PrevInfo);
1199 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1200 Info.setAVL(IncomingInfo);
1201
1202 Info.setVTYPE(
1203 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1204 .getVLMUL(),
1205 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1206 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1207 // if needed.
1208 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1209 IncomingInfo.getTailAgnostic(),
1210 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1211 IncomingInfo.getMaskAgnostic());
1212
1213 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1214 // the AVL.
1215 if (Info.hasSEWLMULRatioOnly()) {
1216 VSETVLIInfo RatiolessInfo = IncomingInfo;
1217 RatiolessInfo.setAVL(Info);
1218 Info = RatiolessInfo;
1219 }
1220}
1221
1222// Given a state with which we evaluated MI (see transferBefore above for why
1223// this might be different that the state MI requested), modify the state to
1224// reflect the changes MI might make.
1225void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1226 const MachineInstr &MI) const {
1227 if (isVectorConfigInstr(MI)) {
1228 Info = getInfoForVSETVLI(MI, *MRI);
1229 return;
1230 }
1231
1233 // Update AVL to vl-output of the fault first load.
1234 Info.setAVLRegDef(MRI->getVRegDef(MI.getOperand(1).getReg()),
1235 MI.getOperand(1).getReg());
1236 return;
1237 }
1238
1239 // If this is something that updates VL/VTYPE that we don't know about, set
1240 // the state to unknown.
1241 if (MI.isCall() || MI.isInlineAsm() ||
1242 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1243 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1244 Info = VSETVLIInfo::getUnknown();
1245}
1246
1247bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1248 VSETVLIInfo &Info) const {
1249 bool HadVectorOp = false;
1250
1251 Info = BlockInfo[MBB.getNumber()].Pred;
1252 for (const MachineInstr &MI : MBB) {
1253 transferBefore(Info, MI);
1254
1255 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1256 HadVectorOp = true;
1257
1258 transferAfter(Info, MI);
1259 }
1260
1261 return HadVectorOp;
1262}
1263
1264void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1265
1266 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1267
1268 BBInfo.InQueue = false;
1269
1270 // Start with the previous entry so that we keep the most conservative state
1271 // we have ever found.
1272 VSETVLIInfo InInfo = BBInfo.Pred;
1273 if (MBB.pred_empty()) {
1274 // There are no predecessors, so use the default starting status.
1275 InInfo.setUnknown();
1276 } else {
1278 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1279 }
1280
1281 // If we don't have any valid predecessor value, wait until we do.
1282 if (!InInfo.isValid())
1283 return;
1284
1285 // If no change, no need to rerun block
1286 if (InInfo == BBInfo.Pred)
1287 return;
1288
1289 BBInfo.Pred = InInfo;
1290 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1291 << " changed to " << BBInfo.Pred << "\n");
1292
1293 // Note: It's tempting to cache the state changes here, but due to the
1294 // compatibility checks performed a blocks output state can change based on
1295 // the input state. To cache, we'd have to add logic for finding
1296 // never-compatible state changes.
1297 VSETVLIInfo TmpStatus;
1298 computeVLVTYPEChanges(MBB, TmpStatus);
1299
1300 // If the new exit value matches the old exit value, we don't need to revisit
1301 // any blocks.
1302 if (BBInfo.Exit == TmpStatus)
1303 return;
1304
1305 BBInfo.Exit = TmpStatus;
1306 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1307 << " changed to " << BBInfo.Exit << "\n");
1308
1309 // Add the successors to the work list so we can propagate the changed exit
1310 // status.
1311 for (MachineBasicBlock *S : MBB.successors())
1312 if (!BlockInfo[S->getNumber()].InQueue) {
1313 BlockInfo[S->getNumber()].InQueue = true;
1314 WorkList.push(S);
1315 }
1316}
1317
1318// If we weren't able to prove a vsetvli was directly unneeded, it might still
1319// be unneeded if the AVL is a phi node where all incoming values are VL
1320// outputs from the last VSETVLI in their respective basic blocks.
1321bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1322 const MachineBasicBlock &MBB) const {
1324 return true;
1325
1326 if (!Require.hasAVLReg())
1327 return true;
1328
1329 // We need the AVL to be produce by a PHI node in this basic block.
1330 const MachineInstr *PHI = &Require.getAVLDefMI();
1331 if (PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1332 return true;
1333
1334 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1335 PHIOp += 2) {
1336 Register InReg = PHI->getOperand(PHIOp).getReg();
1337 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1338 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1339
1340 // We need the PHI input to the be the output of a VSET(I)VLI.
1341 MachineInstr *DefMI = MRI->getVRegDef(InReg);
1342 if (!DefMI || !isVectorConfigInstr(*DefMI))
1343 return true;
1344
1345 // We found a VSET(I)VLI make sure it matches the output of the
1346 // predecessor block.
1347 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, *MRI);
1348 if (DefInfo != PBBExit)
1349 return true;
1350
1351 // Require has the same VL as PBBExit, so if the exit from the
1352 // predecessor has the VTYPE we are looking for we might be able
1353 // to avoid a VSETVLI.
1354 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1355 return true;
1356 }
1357
1358 // If all the incoming values to the PHI checked out, we don't need
1359 // to insert a VSETVLI.
1360 return false;
1361}
1362
1363void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1364 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1365 // Track whether the prefix of the block we've scanned is transparent
1366 // (meaning has not yet changed the abstract state).
1367 bool PrefixTransparent = true;
1368 for (MachineInstr &MI : MBB) {
1369 const VSETVLIInfo PrevInfo = CurInfo;
1370 transferBefore(CurInfo, MI);
1371
1372 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1373 if (isVectorConfigInstr(MI)) {
1374 // Conservatively, mark the VL and VTYPE as live.
1375 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1376 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1377 "Unexpected operands where VL and VTYPE should be");
1378 MI.getOperand(3).setIsDead(false);
1379 MI.getOperand(4).setIsDead(false);
1380 PrefixTransparent = false;
1381 }
1382
1383 uint64_t TSFlags = MI.getDesc().TSFlags;
1384 if (RISCVII::hasSEWOp(TSFlags)) {
1385 if (PrevInfo != CurInfo) {
1386 // If this is the first implicit state change, and the state change
1387 // requested can be proven to produce the same register contents, we
1388 // can skip emitting the actual state change and continue as if we
1389 // had since we know the GPR result of the implicit state change
1390 // wouldn't be used and VL/VTYPE registers are correct. Note that
1391 // we *do* need to model the state as if it changed as while the
1392 // register contents are unchanged, the abstract model can change.
1393 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1394 insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1395 PrefixTransparent = false;
1396 }
1397
1398 if (RISCVII::hasVLOp(TSFlags)) {
1399 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1400 if (VLOp.isReg()) {
1401 Register Reg = VLOp.getReg();
1402 MachineInstr *VLOpDef = MRI->getVRegDef(Reg);
1403
1404 // Erase the AVL operand from the instruction.
1405 VLOp.setReg(RISCV::NoRegister);
1406 VLOp.setIsKill(false);
1407
1408 // If the AVL was an immediate > 31, then it would have been emitted
1409 // as an ADDI. However, the ADDI might not have been used in the
1410 // vsetvli, or a vsetvli might not have been emitted, so it may be
1411 // dead now.
1412 if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) &&
1413 MRI->use_nodbg_empty(Reg))
1414 VLOpDef->eraseFromParent();
1415 }
1416 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1417 /*isImp*/ true));
1418 }
1419 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1420 /*isImp*/ true));
1421 }
1422
1423 if (MI.isCall() || MI.isInlineAsm() ||
1424 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1425 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1426 PrefixTransparent = false;
1427
1428 transferAfter(CurInfo, MI);
1429 }
1430
1431 const auto &Info = BlockInfo[MBB.getNumber()];
1432 if (CurInfo != Info.Exit) {
1433 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1434 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1435 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1436 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1437 }
1438 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1439}
1440
1441/// Perform simple partial redundancy elimination of the VSETVLI instructions
1442/// we're about to insert by looking for cases where we can PRE from the
1443/// beginning of one block to the end of one of its predecessors. Specifically,
1444/// this is geared to catch the common case of a fixed length vsetvl in a single
1445/// block loop when it could execute once in the preheader instead.
1446void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1447 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1448 return;
1449
1450 MachineBasicBlock *UnavailablePred = nullptr;
1451 VSETVLIInfo AvailableInfo;
1452 for (MachineBasicBlock *P : MBB.predecessors()) {
1453 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1454 if (PredInfo.isUnknown()) {
1455 if (UnavailablePred)
1456 return;
1457 UnavailablePred = P;
1458 } else if (!AvailableInfo.isValid()) {
1459 AvailableInfo = PredInfo;
1460 } else if (AvailableInfo != PredInfo) {
1461 return;
1462 }
1463 }
1464
1465 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1466 // phase 3.
1467 if (!UnavailablePred || !AvailableInfo.isValid())
1468 return;
1469
1470 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1471 // the unavailable pred.
1472 if (AvailableInfo.hasSEWLMULRatioOnly())
1473 return;
1474
1475 // Critical edge - TODO: consider splitting?
1476 if (UnavailablePred->succ_size() != 1)
1477 return;
1478
1479 // If the AVL value is a register (other than our VLMAX sentinel),
1480 // we need to prove the value is available at the point we're going
1481 // to insert the vsetvli at.
1482 if (AvailableInfo.hasAVLReg()) {
1483 const MachineInstr *AVLDefMI = &AvailableInfo.getAVLDefMI();
1484 // This is an inline dominance check which covers the case of
1485 // UnavailablePred being the preheader of a loop.
1486 if (AVLDefMI->getParent() != UnavailablePred)
1487 return;
1488 for (auto &TermMI : UnavailablePred->terminators())
1489 if (&TermMI == AVLDefMI)
1490 return;
1491 }
1492
1493 // If the AVL isn't used in its predecessors then bail, since we have no AVL
1494 // to insert a vsetvli with.
1495 if (AvailableInfo.hasAVLIgnored())
1496 return;
1497
1498 // Model the effect of changing the input state of the block MBB to
1499 // AvailableInfo. We're looking for two issues here; one legality,
1500 // one profitability.
1501 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1502 // may hit the end of the block with a different end state. We can
1503 // not make this change without reflowing later blocks as well.
1504 // 2) If we don't actually remove a transition, inserting a vsetvli
1505 // into the predecessor block would be correct, but unprofitable.
1506 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1507 VSETVLIInfo CurInfo = AvailableInfo;
1508 int TransitionsRemoved = 0;
1509 for (const MachineInstr &MI : MBB) {
1510 const VSETVLIInfo LastInfo = CurInfo;
1511 const VSETVLIInfo LastOldInfo = OldInfo;
1512 transferBefore(CurInfo, MI);
1513 transferBefore(OldInfo, MI);
1514 if (CurInfo == LastInfo)
1515 TransitionsRemoved++;
1516 if (LastOldInfo == OldInfo)
1517 TransitionsRemoved--;
1518 transferAfter(CurInfo, MI);
1519 transferAfter(OldInfo, MI);
1520 if (CurInfo == OldInfo)
1521 // Convergence. All transitions after this must match by construction.
1522 break;
1523 }
1524 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1525 // Issues 1 and 2 above
1526 return;
1527
1528 // Finally, update both data flow state and insert the actual vsetvli.
1529 // Doing both keeps the code in sync with the dataflow results, which
1530 // is critical for correctness of phase 3.
1531 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1532 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1533 << UnavailablePred->getName() << " with state "
1534 << AvailableInfo << "\n");
1535 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1536 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1537
1538 // Note there's an implicit assumption here that terminators never use
1539 // or modify VL or VTYPE. Also, fallthrough will return end().
1540 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1541 insertVSETVLI(*UnavailablePred, InsertPt,
1542 UnavailablePred->findDebugLoc(InsertPt),
1543 AvailableInfo, OldExit);
1544}
1545
1546// Return true if we can mutate PrevMI to match MI without changing any the
1547// fields which would be observed.
1548static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1549 const MachineInstr &MI,
1550 const DemandedFields &Used,
1551 const MachineRegisterInfo &MRI) {
1552 // If the VL values aren't equal, return false if either a) the former is
1553 // demanded, or b) we can't rewrite the former to be the later for
1554 // implementation reasons.
1555 if (!isVLPreservingConfig(MI)) {
1556 if (Used.VLAny)
1557 return false;
1558
1559 if (Used.VLZeroness) {
1560 if (isVLPreservingConfig(PrevMI))
1561 return false;
1562 if (!getInfoForVSETVLI(PrevMI, MRI)
1563 .hasEquallyZeroAVL(getInfoForVSETVLI(MI, MRI)))
1564 return false;
1565 }
1566
1567 auto &AVL = MI.getOperand(1);
1568 auto &PrevAVL = PrevMI.getOperand(1);
1569
1570 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1571 // For now just check that PrevMI uses the same virtual register.
1572 if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
1573 (!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
1574 PrevAVL.getReg() != AVL.getReg()))
1575 return false;
1576 }
1577
1578 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1579 auto PriorVType = PrevMI.getOperand(2).getImm();
1580 auto VType = MI.getOperand(2).getImm();
1581 return areCompatibleVTYPEs(PriorVType, VType, Used);
1582}
1583
1584bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
1585 MachineInstr *NextMI = nullptr;
1586 // We can have arbitrary code in successors, so VL and VTYPE
1587 // must be considered demanded.
1588 DemandedFields Used;
1589 Used.demandVL();
1590 Used.demandVTYPE();
1592 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1593
1594 if (!isVectorConfigInstr(MI)) {
1595 Used.doUnion(getDemanded(MI, MRI, ST));
1596 if (MI.isCall() || MI.isInlineAsm() ||
1597 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1598 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1599 NextMI = nullptr;
1600 continue;
1601 }
1602
1603 Register RegDef = MI.getOperand(0).getReg();
1604 assert(RegDef == RISCV::X0 || RegDef.isVirtual());
1605 if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef))
1606 Used.demandVL();
1607
1608 if (NextMI) {
1609 if (!Used.usedVL() && !Used.usedVTYPE()) {
1610 ToDelete.push_back(&MI);
1611 // Leave NextMI unchanged
1612 continue;
1613 }
1614
1615 if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
1616 if (!isVLPreservingConfig(*NextMI)) {
1617 Register DefReg = NextMI->getOperand(0).getReg();
1618
1619 MI.getOperand(0).setReg(DefReg);
1620 MI.getOperand(0).setIsDead(false);
1621
1622 // The def of DefReg moved to MI, so extend the LiveInterval up to
1623 // it.
1624 if (DefReg.isVirtual()) {
1625 LiveInterval &DefLI = LIS->getInterval(DefReg);
1626 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1627 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1628 LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1629 DefLI.addSegment(S);
1630 DefVNI->def = MISlot;
1631 // Mark DefLI as spillable if it was previously unspillable
1632 DefLI.setWeight(0);
1633
1634 // DefReg may have had no uses, in which case we need to shrink
1635 // the LiveInterval up to MI.
1636 LIS->shrinkToUses(&DefLI);
1637 }
1638
1639 Register OldVLReg;
1640 if (MI.getOperand(1).isReg())
1641 OldVLReg = MI.getOperand(1).getReg();
1642 if (NextMI->getOperand(1).isImm())
1643 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1644 else
1645 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1646
1647 // Clear NextMI's AVL early so we're not counting it as a use.
1648 if (NextMI->getOperand(1).isReg())
1649 NextMI->getOperand(1).setReg(RISCV::NoRegister);
1650
1651 if (OldVLReg && OldVLReg.isVirtual()) {
1652 // NextMI no longer uses OldVLReg so shrink its LiveInterval.
1653 LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1654
1655 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1656 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1657 MRI->use_nodbg_empty(OldVLReg)) {
1658 VLOpDef->eraseFromParent();
1659 LIS->removeInterval(OldVLReg);
1660 }
1661 }
1662 MI.setDesc(NextMI->getDesc());
1663 }
1664 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1665 ToDelete.push_back(NextMI);
1666 // fallthrough
1667 }
1668 }
1669 NextMI = &MI;
1670 Used = getDemanded(MI, MRI, ST);
1671 }
1672
1673 NumCoalescedVSETVL += ToDelete.size();
1674 for (auto *MI : ToDelete) {
1675 LIS->RemoveMachineInstrFromMaps(*MI);
1676 MI->eraseFromParent();
1677 }
1678
1679 return !ToDelete.empty();
1680}
1681
1682void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1683 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1684 MachineInstr &MI = *I++;
1686 Register VLOutput = MI.getOperand(1).getReg();
1687 if (!MRI->use_nodbg_empty(VLOutput))
1688 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1689 VLOutput);
1690 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1691 MI.getOperand(1).setReg(RISCV::X0);
1692 }
1693 }
1694}
1695
1696bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1697 // Skip if the vector extension is not enabled.
1699 if (!ST->hasVInstructions())
1700 return false;
1701
1702 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1703
1704 TII = ST->getInstrInfo();
1705 MRI = &MF.getRegInfo();
1706
1707 assert(BlockInfo.empty() && "Expect empty block infos");
1708 BlockInfo.resize(MF.getNumBlockIDs());
1709
1710 bool HaveVectorOp = false;
1711
1712 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1713 for (const MachineBasicBlock &MBB : MF) {
1714 VSETVLIInfo TmpStatus;
1715 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1716 // Initial exit state is whatever change we found in the block.
1717 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1718 BBInfo.Exit = TmpStatus;
1719 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1720 << " is " << BBInfo.Exit << "\n");
1721
1722 }
1723
1724 // If we didn't find any instructions that need VSETVLI, we're done.
1725 if (!HaveVectorOp) {
1726 BlockInfo.clear();
1727 return false;
1728 }
1729
1730 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1731 // blocks to the list here, but will also add any that need to be revisited
1732 // during Phase 2 processing.
1733 for (const MachineBasicBlock &MBB : MF) {
1734 WorkList.push(&MBB);
1735 BlockInfo[MBB.getNumber()].InQueue = true;
1736 }
1737 while (!WorkList.empty()) {
1738 const MachineBasicBlock &MBB = *WorkList.front();
1739 WorkList.pop();
1740 computeIncomingVLVTYPE(MBB);
1741 }
1742
1743 // Perform partial redundancy elimination of vsetvli transitions.
1744 for (MachineBasicBlock &MBB : MF)
1745 doPRE(MBB);
1746
1747 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1748 // Phase 2 information to avoid adding vsetvlis before the first vector
1749 // instruction in the block if the VL/VTYPE is satisfied by its
1750 // predecessors.
1751 for (MachineBasicBlock &MBB : MF)
1752 emitVSETVLIs(MBB);
1753
1754 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1755 // of VLEFF/VLSEGFF.
1756 for (MachineBasicBlock &MBB : MF)
1757 insertReadVL(MBB);
1758
1759 BlockInfo.clear();
1760 return HaveVectorOp;
1761}
1762
1763/// Returns an instance of the Insert VSETVLI pass.
1765 return new RISCVInsertVSETVLI();
1766}
1767
1768// Now that all vsetvlis are explicit, go through and do block local
1769// DSE and peephole based demanded fields based transforms. Note that
1770// this *must* be done outside the main dataflow so long as we allow
1771// any cross block analysis within the dataflow. We can't have both
1772// demanded fields based mutation and non-local analysis in the
1773// dataflow at the same time without introducing inconsistencies.
1774bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1775 // Skip if the vector extension is not enabled.
1776 ST = &MF.getSubtarget<RISCVSubtarget>();
1777 if (!ST->hasVInstructions())
1778 return false;
1779 TII = ST->getInstrInfo();
1780 MRI = &MF.getRegInfo();
1781 LIS = &getAnalysis<LiveIntervals>();
1782
1783 bool Changed = false;
1784 for (MachineBasicBlock &MBB : MF)
1785 Changed |= coalesceVSETVLIs(MBB);
1786
1787 return Changed;
1788}
1789
1791 return new RISCVCoalesceVSETVLI();
1792}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition: Compiler.h:151
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static ValueLatticeElement intersect(const ValueLatticeElement &A, const ValueLatticeElement &B)
Combine two sets of facts about the same value into a single set of facts.
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
NewInfo setVTYPE(MI.getOperand(2).getImm())
Register AVLReg
#define RISCV_COALESCE_VSETVLI_NAME
assert((AVLReg !=RISCV::X0||MI.getOperand(0).getReg() !=RISCV::X0) &&"Can't handle X0, X0 vsetvli yet")
static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, RISCVII::VLMUL VLMul)
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, const RISCVSubtarget &ST, const MachineRegisterInfo *MRI)
#define RISCV_INSERT_VSETVLI_NAME
else NewInfo setAVLRegDef(MRI.getVRegDef(AVLReg), AVLReg)
static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, DemandedFields &Demanded)
static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL)
static bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, const DemandedFields &Used, const MachineRegisterInfo &MRI)
static cl::opt< bool > DisableInsertVSETVLPHIOpt("riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, cl::desc("Disable looking through phis when inserting vsetvlis."))
#define DEBUG_TYPE
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, false, false) char RISCVCoalesceVSETVLI const MachineRegisterInfo & MRI
return NewInfo
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
raw_pwrite_stream & OS
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
BlockData()=default
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
void setWeight(float Value)
Definition: LiveInterval.h:721
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
reverse_iterator rend()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
unsigned succ_size() const
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
reverse_iterator rbegin()
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:561
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:555
bool isRegSequence() const
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:240
SlotIndexes pass.
Definition: SlotIndexes.h:300
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
TargetPassConfig.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static bool usesMaskPolicy(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool doesForceTailAgnostic(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
static RISCVII::VLMUL getVLMUL(unsigned VType)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
static bool isMaskAgnostic(unsigned VType)
static bool isValidSEW(unsigned SEW)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
static unsigned getSEW(unsigned VType)
std::optional< RISCVII::VLMUL > getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
bool isFaultFirstLoad(const MachineInstr &MI)
static constexpr int64_t VLMaxSentinel
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Uninitialized
Definition: Threading.h:61
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createRISCVInsertVSETVLIPass()
Returns an instance of the Insert VSETVLI pass.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
FunctionPass * createRISCVCoalesceVSETVLIPass()
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Status intersect(const Status &S) const
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162