LLVM 20.0.0git
RISCVInsertVSETVLI.cpp
Go to the documentation of this file.
1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
29#include "llvm/ADT/Statistic.h"
34#include <queue>
35using namespace llvm;
36
37#define DEBUG_TYPE "riscv-insert-vsetvli"
38#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
40
41STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
43
44namespace {
45
46/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
47/// This will return nullptr if the virtual register is an implicit_def or
48/// if LiveIntervals is not available.
49static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
50 const LiveIntervals *LIS) {
51 assert(Reg.isVirtual());
52 if (!LIS)
53 return nullptr;
54 auto &LI = LIS->getInterval(Reg);
56 return LI.getVNInfoBefore(SI);
57}
58
59static unsigned getVLOpNum(const MachineInstr &MI) {
60 return RISCVII::getVLOpNum(MI.getDesc());
61}
62
63static unsigned getSEWOpNum(const MachineInstr &MI) {
64 return RISCVII::getSEWOpNum(MI.getDesc());
65}
66
67static bool isVectorConfigInstr(const MachineInstr &MI) {
68 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
69 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
70 MI.getOpcode() == RISCV::PseudoVSETIVLI;
71}
72
73/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
74/// VL and only sets VTYPE.
75static bool isVLPreservingConfig(const MachineInstr &MI) {
76 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
77 return false;
78 assert(RISCV::X0 == MI.getOperand(1).getReg());
79 return RISCV::X0 == MI.getOperand(0).getReg();
80}
81
82static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
83 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
84 default:
85 return false;
86 case RISCV::VFMV_S_F:
87 case RISCV::VFMV_V_F:
88 return true;
89 }
90}
91
92static bool isScalarExtractInstr(const MachineInstr &MI) {
93 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
94 default:
95 return false;
96 case RISCV::VMV_X_S:
97 case RISCV::VFMV_F_S:
98 return true;
99 }
100}
101
102static bool isScalarInsertInstr(const MachineInstr &MI) {
103 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
104 default:
105 return false;
106 case RISCV::VMV_S_X:
107 case RISCV::VFMV_S_F:
108 return true;
109 }
110}
111
112static bool isScalarSplatInstr(const MachineInstr &MI) {
113 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
114 default:
115 return false;
116 case RISCV::VMV_V_I:
117 case RISCV::VMV_V_X:
118 case RISCV::VFMV_V_F:
119 return true;
120 }
121}
122
123static bool isVSlideInstr(const MachineInstr &MI) {
124 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
125 default:
126 return false;
127 case RISCV::VSLIDEDOWN_VX:
128 case RISCV::VSLIDEDOWN_VI:
129 case RISCV::VSLIDEUP_VX:
130 case RISCV::VSLIDEUP_VI:
131 return true;
132 }
133}
134
135/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
136/// not a load or store which ignores SEW.
137static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
138 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
139 default:
140 return std::nullopt;
141 case RISCV::VLE8_V:
142 case RISCV::VLSE8_V:
143 case RISCV::VSE8_V:
144 case RISCV::VSSE8_V:
145 return 8;
146 case RISCV::VLE16_V:
147 case RISCV::VLSE16_V:
148 case RISCV::VSE16_V:
149 case RISCV::VSSE16_V:
150 return 16;
151 case RISCV::VLE32_V:
152 case RISCV::VLSE32_V:
153 case RISCV::VSE32_V:
154 case RISCV::VSSE32_V:
155 return 32;
156 case RISCV::VLE64_V:
157 case RISCV::VLSE64_V:
158 case RISCV::VSE64_V:
159 case RISCV::VSSE64_V:
160 return 64;
161 }
162}
163
164static bool isNonZeroLoadImmediate(const MachineInstr &MI) {
165 return MI.getOpcode() == RISCV::ADDI &&
166 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
167 MI.getOperand(1).getReg() == RISCV::X0 &&
168 MI.getOperand(2).getImm() != 0;
169}
170
171/// Return true if this is an operation on mask registers. Note that
172/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
173static bool isMaskRegOp(const MachineInstr &MI) {
174 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
175 return false;
176 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
177 // A Log2SEW of 0 is an operation on mask registers only.
178 return Log2SEW == 0;
179}
180
181/// Return true if the inactive elements in the result are entirely undefined.
182/// Note that this is different from "agnostic" as defined by the vector
183/// specification. Agnostic requires each lane to either be undisturbed, or
184/// take the value -1; no other value is allowed.
185static bool hasUndefinedPassthru(const MachineInstr &MI) {
186
187 unsigned UseOpIdx;
188 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
189 // If there is no passthrough operand, then the pass through
190 // lanes are undefined.
191 return true;
192
193 // All undefined passthrus should be $noreg: see
194 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
195 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
196 return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
197}
198
199/// Which subfields of VL or VTYPE have values we need to preserve?
200struct DemandedFields {
201 // Some unknown property of VL is used. If demanded, must preserve entire
202 // value.
203 bool VLAny = false;
204 // Only zero vs non-zero is used. If demanded, can change non-zero values.
205 bool VLZeroness = false;
206 // What properties of SEW we need to preserve.
207 enum : uint8_t {
208 SEWEqual = 3, // The exact value of SEW needs to be preserved.
209 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
210 // than or equal to the original value.
211 SEWGreaterThanOrEqualAndLessThan64 =
212 1, // SEW can be changed as long as it's greater
213 // than or equal to the original value, but must be less
214 // than 64.
215 SEWNone = 0 // We don't need to preserve SEW at all.
216 } SEW = SEWNone;
217 enum : uint8_t {
218 LMULEqual = 2, // The exact value of LMUL needs to be preserved.
219 LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
220 LMULNone = 0 // We don't need to preserve LMUL at all.
221 } LMUL = LMULNone;
222 bool SEWLMULRatio = false;
223 bool TailPolicy = false;
224 bool MaskPolicy = false;
225
226 // Return true if any part of VTYPE was used
227 bool usedVTYPE() const {
228 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
229 }
230
231 // Return true if any property of VL was used
232 bool usedVL() {
233 return VLAny || VLZeroness;
234 }
235
236 // Mark all VTYPE subfields and properties as demanded
237 void demandVTYPE() {
238 SEW = SEWEqual;
239 LMUL = LMULEqual;
240 SEWLMULRatio = true;
241 TailPolicy = true;
242 MaskPolicy = true;
243 }
244
245 // Mark all VL properties as demanded
246 void demandVL() {
247 VLAny = true;
248 VLZeroness = true;
249 }
250
251 static DemandedFields all() {
252 DemandedFields DF;
253 DF.demandVTYPE();
254 DF.demandVL();
255 return DF;
256 }
257
258 // Make this the result of demanding both the fields in this and B.
259 void doUnion(const DemandedFields &B) {
260 VLAny |= B.VLAny;
261 VLZeroness |= B.VLZeroness;
262 SEW = std::max(SEW, B.SEW);
263 LMUL = std::max(LMUL, B.LMUL);
264 SEWLMULRatio |= B.SEWLMULRatio;
265 TailPolicy |= B.TailPolicy;
266 MaskPolicy |= B.MaskPolicy;
267 }
268
269#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
270 /// Support for debugging, callable in GDB: V->dump()
271 LLVM_DUMP_METHOD void dump() const {
272 print(dbgs());
273 dbgs() << "\n";
274 }
275
276 /// Implement operator<<.
277 void print(raw_ostream &OS) const {
278 OS << "{";
279 OS << "VLAny=" << VLAny << ", ";
280 OS << "VLZeroness=" << VLZeroness << ", ";
281 OS << "SEW=";
282 switch (SEW) {
283 case SEWEqual:
284 OS << "SEWEqual";
285 break;
286 case SEWGreaterThanOrEqual:
287 OS << "SEWGreaterThanOrEqual";
288 break;
289 case SEWGreaterThanOrEqualAndLessThan64:
290 OS << "SEWGreaterThanOrEqualAndLessThan64";
291 break;
292 case SEWNone:
293 OS << "SEWNone";
294 break;
295 };
296 OS << ", ";
297 OS << "LMUL=";
298 switch (LMUL) {
299 case LMULEqual:
300 OS << "LMULEqual";
301 break;
302 case LMULLessThanOrEqualToM1:
303 OS << "LMULLessThanOrEqualToM1";
304 break;
305 case LMULNone:
306 OS << "LMULNone";
307 break;
308 };
309 OS << ", ";
310 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
311 OS << "TailPolicy=" << TailPolicy << ", ";
312 OS << "MaskPolicy=" << MaskPolicy;
313 OS << "}";
314 }
315#endif
316};
317
318#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
320inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
321 DF.print(OS);
322 return OS;
323}
324#endif
325
326static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
327 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
328 return Fractional || LMul == 1;
329}
330
331/// Return true if moving from CurVType to NewVType is
332/// indistinguishable from the perspective of an instruction (or set
333/// of instructions) which use only the Used subfields and properties.
334static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
335 const DemandedFields &Used) {
336 switch (Used.SEW) {
337 case DemandedFields::SEWNone:
338 break;
339 case DemandedFields::SEWEqual:
340 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
341 return false;
342 break;
343 case DemandedFields::SEWGreaterThanOrEqual:
344 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
345 return false;
346 break;
347 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
348 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
349 RISCVVType::getSEW(NewVType) >= 64)
350 return false;
351 break;
352 }
353
354 switch (Used.LMUL) {
355 case DemandedFields::LMULNone:
356 break;
357 case DemandedFields::LMULEqual:
358 if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
359 return false;
360 break;
361 case DemandedFields::LMULLessThanOrEqualToM1:
362 if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
363 return false;
364 break;
365 }
366
367 if (Used.SEWLMULRatio) {
368 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
369 RISCVVType::getVLMUL(CurVType));
370 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
371 RISCVVType::getVLMUL(NewVType));
372 if (Ratio1 != Ratio2)
373 return false;
374 }
375
376 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
378 return false;
379 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
381 return false;
382 return true;
383}
384
385/// Return the fields and properties demanded by the provided instruction.
386DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
387 // This function works in coalesceVSETVLI too. We can still use the value of a
388 // SEW, VL, or Policy operand even though it might not be the exact value in
389 // the VL or VTYPE, since we only care about what the instruction originally
390 // demanded.
391
392 // Most instructions don't use any of these subfeilds.
393 DemandedFields Res;
394 // Start conservative if registers are used
395 if (MI.isCall() || MI.isInlineAsm() ||
396 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
397 Res.demandVL();
398 if (MI.isCall() || MI.isInlineAsm() ||
399 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
400 Res.demandVTYPE();
401 // Start conservative on the unlowered form too
402 uint64_t TSFlags = MI.getDesc().TSFlags;
403 if (RISCVII::hasSEWOp(TSFlags)) {
404 Res.demandVTYPE();
405 if (RISCVII::hasVLOp(TSFlags))
406 if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
407 !VLOp.isReg() || !VLOp.isUndef())
408 Res.demandVL();
409
410 // Behavior is independent of mask policy.
411 if (!RISCVII::usesMaskPolicy(TSFlags))
412 Res.MaskPolicy = false;
413 }
414
415 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
416 // They instead demand the ratio of the two which is used in computing
417 // EMUL, but which allows us the flexibility to change SEW and LMUL
418 // provided we don't change the ratio.
419 // Note: We assume that the instructions initial SEW is the EEW encoded
420 // in the opcode. This is asserted when constructing the VSETVLIInfo.
421 if (getEEWForLoadStore(MI)) {
422 Res.SEW = DemandedFields::SEWNone;
423 Res.LMUL = DemandedFields::LMULNone;
424 }
425
426 // Store instructions don't use the policy fields.
427 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
428 Res.TailPolicy = false;
429 Res.MaskPolicy = false;
430 }
431
432 // If this is a mask reg operation, it only cares about VLMAX.
433 // TODO: Possible extensions to this logic
434 // * Probably ok if available VLMax is larger than demanded
435 // * The policy bits can probably be ignored..
436 if (isMaskRegOp(MI)) {
437 Res.SEW = DemandedFields::SEWNone;
438 Res.LMUL = DemandedFields::LMULNone;
439 }
440
441 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
442 if (isScalarInsertInstr(MI)) {
443 Res.LMUL = DemandedFields::LMULNone;
444 Res.SEWLMULRatio = false;
445 Res.VLAny = false;
446 // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't
447 // need to preserve any other bits and are thus compatible with any larger,
448 // etype and can disregard policy bits. Warning: It's tempting to try doing
449 // this for any tail agnostic operation, but we can't as TA requires
450 // tail lanes to either be the original value or -1. We are writing
451 // unknown bits to the lanes here.
452 if (hasUndefinedPassthru(MI)) {
453 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
454 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
455 else
456 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
457 Res.TailPolicy = false;
458 }
459 }
460
461 // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW.
462 if (isScalarExtractInstr(MI)) {
463 assert(!RISCVII::hasVLOp(TSFlags));
464 Res.LMUL = DemandedFields::LMULNone;
465 Res.SEWLMULRatio = false;
466 Res.TailPolicy = false;
467 Res.MaskPolicy = false;
468 }
469
470 if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
471 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
472 // A slidedown/slideup with an *undefined* passthru can freely clobber
473 // elements not copied from the source vector (e.g. masked off, tail, or
474 // slideup's prefix). Notes:
475 // * We can't modify SEW here since the slide amount is in units of SEW.
476 // * VL=1 is special only because we have existing support for zero vs
477 // non-zero VL. We could generalize this if we had a VL > C predicate.
478 // * The LMUL1 restriction is for machines whose latency may depend on VL.
479 // * As above, this is only legal for tail "undefined" not "agnostic".
480 if (isVSlideInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
481 hasUndefinedPassthru(MI)) {
482 Res.VLAny = false;
483 Res.VLZeroness = true;
484 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
485 Res.TailPolicy = false;
486 }
487
488 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
489 // same semantically as vmv.s.x. This is particularly useful since we don't
490 // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
491 // it's place. Since a splat is non-constant time in LMUL, we do need to be
492 // careful to not increase the number of active vector registers (unlike for
493 // vmv.s.x.)
494 if (isScalarSplatInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
495 hasUndefinedPassthru(MI)) {
496 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
497 Res.SEWLMULRatio = false;
498 Res.VLAny = false;
499 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
500 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
501 else
502 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
503 Res.TailPolicy = false;
504 }
505 }
506
507 return Res;
508}
509
510/// Defines the abstract state with which the forward dataflow models the
511/// values of the VL and VTYPE registers after insertion.
512class VSETVLIInfo {
513 struct AVLDef {
514 // Every AVLDef should have a VNInfo, unless we're running without
515 // LiveIntervals in which case this will be nullptr.
516 const VNInfo *ValNo;
517 Register DefReg;
518 };
519 union {
520 AVLDef AVLRegDef;
521 unsigned AVLImm;
522 };
523
524 enum : uint8_t {
526 AVLIsReg,
527 AVLIsImm,
528 AVLIsVLMAX,
529 Unknown, // AVL and VTYPE are fully unknown
530 } State = Uninitialized;
531
532 // Fields from VTYPE.
534 uint8_t SEW = 0;
535 uint8_t TailAgnostic : 1;
536 uint8_t MaskAgnostic : 1;
537 uint8_t SEWLMULRatioOnly : 1;
538
539public:
540 VSETVLIInfo()
541 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
542 SEWLMULRatioOnly(false) {}
543
544 static VSETVLIInfo getUnknown() {
545 VSETVLIInfo Info;
546 Info.setUnknown();
547 return Info;
548 }
549
550 bool isValid() const { return State != Uninitialized; }
551 void setUnknown() { State = Unknown; }
552 bool isUnknown() const { return State == Unknown; }
553
554 void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
555 assert(AVLReg.isVirtual());
556 AVLRegDef.ValNo = VNInfo;
557 AVLRegDef.DefReg = AVLReg;
558 State = AVLIsReg;
559 }
560
561 void setAVLImm(unsigned Imm) {
562 AVLImm = Imm;
563 State = AVLIsImm;
564 }
565
566 void setAVLVLMAX() { State = AVLIsVLMAX; }
567
568 bool hasAVLImm() const { return State == AVLIsImm; }
569 bool hasAVLReg() const { return State == AVLIsReg; }
570 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
571 Register getAVLReg() const {
572 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
573 return AVLRegDef.DefReg;
574 }
575 unsigned getAVLImm() const {
576 assert(hasAVLImm());
577 return AVLImm;
578 }
579 const VNInfo *getAVLVNInfo() const {
580 assert(hasAVLReg());
581 return AVLRegDef.ValNo;
582 }
583 // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
584 // a PHI node. In that case getAVLVNInfo()->def will point to the block
585 // boundary slot and this will return nullptr. If LiveIntervals isn't
586 // available, nullptr is also returned.
587 const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
588 assert(hasAVLReg());
589 if (!LIS || getAVLVNInfo()->isPHIDef())
590 return nullptr;
591 auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
592 assert(MI);
593 return MI;
594 }
595
596 void setAVL(VSETVLIInfo Info) {
597 assert(Info.isValid());
598 if (Info.isUnknown())
599 setUnknown();
600 else if (Info.hasAVLReg())
601 setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
602 else if (Info.hasAVLVLMAX())
603 setAVLVLMAX();
604 else {
605 assert(Info.hasAVLImm());
606 setAVLImm(Info.getAVLImm());
607 }
608 }
609
610 unsigned getSEW() const { return SEW; }
611 RISCVII::VLMUL getVLMUL() const { return VLMul; }
612 bool getTailAgnostic() const { return TailAgnostic; }
613 bool getMaskAgnostic() const { return MaskAgnostic; }
614
615 bool hasNonZeroAVL(const LiveIntervals *LIS) const {
616 if (hasAVLImm())
617 return getAVLImm() > 0;
618 if (hasAVLReg()) {
619 if (auto *DefMI = getAVLDefMI(LIS))
620 return isNonZeroLoadImmediate(*DefMI);
621 }
622 if (hasAVLVLMAX())
623 return true;
624 return false;
625 }
626
627 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
628 const LiveIntervals *LIS) const {
629 if (hasSameAVL(Other))
630 return true;
631 return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
632 }
633
634 bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
635 if (hasAVLReg() && Other.hasAVLReg()) {
636 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
637 "we either have intervals or we don't");
638 if (!getAVLVNInfo())
639 return getAVLReg() == Other.getAVLReg();
640 return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
641 getAVLReg() == Other.getAVLReg();
642 }
643
644 if (hasAVLImm() && Other.hasAVLImm())
645 return getAVLImm() == Other.getAVLImm();
646
647 if (hasAVLVLMAX())
648 return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
649
650 return false;
651 }
652
653 // Return true if the two lattice values are guaranteed to have
654 // the same AVL value at runtime.
655 bool hasSameAVL(const VSETVLIInfo &Other) const {
656 // Without LiveIntervals, we don't know which instruction defines a
657 // register. Since a register may be redefined, this means all AVLIsReg
658 // states must be treated as possibly distinct.
659 if (hasAVLReg() && Other.hasAVLReg()) {
660 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
661 "we either have intervals or we don't");
662 if (!getAVLVNInfo())
663 return false;
664 }
665 return hasSameAVLLatticeValue(Other);
666 }
667
668 void setVTYPE(unsigned VType) {
669 assert(isValid() && !isUnknown() &&
670 "Can't set VTYPE for uninitialized or unknown");
671 VLMul = RISCVVType::getVLMUL(VType);
672 SEW = RISCVVType::getSEW(VType);
673 TailAgnostic = RISCVVType::isTailAgnostic(VType);
674 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
675 }
676 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
677 assert(isValid() && !isUnknown() &&
678 "Can't set VTYPE for uninitialized or unknown");
679 VLMul = L;
680 SEW = S;
681 TailAgnostic = TA;
682 MaskAgnostic = MA;
683 }
684
685 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
686
687 unsigned encodeVTYPE() const {
688 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
689 "Can't encode VTYPE for uninitialized or unknown");
690 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
691 }
692
693 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
694
695 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
696 assert(isValid() && Other.isValid() &&
697 "Can't compare invalid VSETVLIInfos");
698 assert(!isUnknown() && !Other.isUnknown() &&
699 "Can't compare VTYPE in unknown state");
700 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
701 "Can't compare when only LMUL/SEW ratio is valid.");
702 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
703 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
704 Other.MaskAgnostic);
705 }
706
707 unsigned getSEWLMULRatio() const {
708 assert(isValid() && !isUnknown() &&
709 "Can't use VTYPE for uninitialized or unknown");
710 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
711 }
712
713 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
714 // Note that having the same VLMAX ensures that both share the same
715 // function from AVL to VL; that is, they must produce the same VL value
716 // for any given AVL value.
717 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
718 assert(isValid() && Other.isValid() &&
719 "Can't compare invalid VSETVLIInfos");
720 assert(!isUnknown() && !Other.isUnknown() &&
721 "Can't compare VTYPE in unknown state");
722 return getSEWLMULRatio() == Other.getSEWLMULRatio();
723 }
724
725 bool hasCompatibleVTYPE(const DemandedFields &Used,
726 const VSETVLIInfo &Require) const {
727 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
728 }
729
730 // Determine whether the vector instructions requirements represented by
731 // Require are compatible with the previous vsetvli instruction represented
732 // by this. MI is the instruction whose requirements we're considering.
733 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
734 const LiveIntervals *LIS) const {
735 assert(isValid() && Require.isValid() &&
736 "Can't compare invalid VSETVLIInfos");
737 // Nothing is compatible with Unknown.
738 if (isUnknown() || Require.isUnknown())
739 return false;
740
741 // If only our VLMAX ratio is valid, then this isn't compatible.
742 if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
743 return false;
744
745 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
746 return false;
747
748 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
749 return false;
750
751 return hasCompatibleVTYPE(Used, Require);
752 }
753
754 bool operator==(const VSETVLIInfo &Other) const {
755 // Uninitialized is only equal to another Uninitialized.
756 if (!isValid())
757 return !Other.isValid();
758 if (!Other.isValid())
759 return !isValid();
760
761 // Unknown is only equal to another Unknown.
762 if (isUnknown())
763 return Other.isUnknown();
764 if (Other.isUnknown())
765 return isUnknown();
766
767 if (!hasSameAVLLatticeValue(Other))
768 return false;
769
770 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
771 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
772 return false;
773
774 // If only the VLMAX is valid, check that it is the same.
775 if (SEWLMULRatioOnly)
776 return hasSameVLMAX(Other);
777
778 // If the full VTYPE is valid, check that it is the same.
779 return hasSameVTYPE(Other);
780 }
781
782 bool operator!=(const VSETVLIInfo &Other) const {
783 return !(*this == Other);
784 }
785
786 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
787 // both predecessors.
788 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
789 // If the new value isn't valid, ignore it.
790 if (!Other.isValid())
791 return *this;
792
793 // If this value isn't valid, this must be the first predecessor, use it.
794 if (!isValid())
795 return Other;
796
797 // If either is unknown, the result is unknown.
798 if (isUnknown() || Other.isUnknown())
799 return VSETVLIInfo::getUnknown();
800
801 // If we have an exact, match return this.
802 if (*this == Other)
803 return *this;
804
805 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
806 // return an SEW/LMUL ratio only value.
807 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
808 VSETVLIInfo MergeInfo = *this;
809 MergeInfo.SEWLMULRatioOnly = true;
810 return MergeInfo;
811 }
812
813 // Otherwise the result is unknown.
814 return VSETVLIInfo::getUnknown();
815 }
816
817#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
818 /// Support for debugging, callable in GDB: V->dump()
819 LLVM_DUMP_METHOD void dump() const {
820 print(dbgs());
821 dbgs() << "\n";
822 }
823
824 /// Implement operator<<.
825 /// @{
826 void print(raw_ostream &OS) const {
827 OS << "{";
828 if (!isValid())
829 OS << "Uninitialized";
830 if (isUnknown())
831 OS << "unknown";
832 if (hasAVLReg())
833 OS << "AVLReg=" << llvm::printReg(getAVLReg());
834 if (hasAVLImm())
835 OS << "AVLImm=" << (unsigned)AVLImm;
836 if (hasAVLVLMAX())
837 OS << "AVLVLMAX";
838 OS << ", "
839 << "VLMul=" << (unsigned)VLMul << ", "
840 << "SEW=" << (unsigned)SEW << ", "
841 << "TailAgnostic=" << (bool)TailAgnostic << ", "
842 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
843 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
844 }
845#endif
846};
847
848#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
850inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
851 V.print(OS);
852 return OS;
853}
854#endif
855
856struct BlockData {
857 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
858 // block. Calculated in Phase 2.
859 VSETVLIInfo Exit;
860
861 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
862 // blocks. Calculated in Phase 2, and used by Phase 3.
863 VSETVLIInfo Pred;
864
865 // Keeps track of whether the block is already in the queue.
866 bool InQueue = false;
867
868 BlockData() = default;
869};
870
871class RISCVInsertVSETVLI : public MachineFunctionPass {
872 const RISCVSubtarget *ST;
873 const TargetInstrInfo *TII;
875 // Possibly null!
876 LiveIntervals *LIS;
877
878 std::vector<BlockData> BlockInfo;
879 std::queue<const MachineBasicBlock *> WorkList;
880
881public:
882 static char ID;
883
884 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
885 bool runOnMachineFunction(MachineFunction &MF) override;
886
887 void getAnalysisUsage(AnalysisUsage &AU) const override {
888 AU.setPreservesCFG();
889
895
897 }
898
899 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
900
901private:
902 bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
903 const VSETVLIInfo &CurInfo) const;
904 bool needVSETVLIPHI(const VSETVLIInfo &Require,
905 const MachineBasicBlock &MBB) const;
906 void insertVSETVLI(MachineBasicBlock &MBB,
908 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
909
910 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
911 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
912 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
913 VSETVLIInfo &Info) const;
914 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
915 void emitVSETVLIs(MachineBasicBlock &MBB);
916 void doPRE(MachineBasicBlock &MBB);
917 void insertReadVL(MachineBasicBlock &MBB);
918
919 bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
920 const DemandedFields &Used) const;
921 void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
922
923 VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
924 VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
925 void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
926};
927
928} // end anonymous namespace
929
930char RISCVInsertVSETVLI::ID = 0;
931char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
932
934 false, false)
935
936// If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can
937// replace the AVL operand with the AVL of the defining vsetvli. E.g.
938//
939// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
940// $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1
941// ->
942// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
943// $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
944void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const {
945 if (!Info.hasAVLReg())
946 return;
947 const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
948 if (!DefMI || !isVectorConfigInstr(*DefMI))
949 return;
950 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
951 if (!DefInstrInfo.hasSameVLMAX(Info))
952 return;
953 Info.setAVL(DefInstrInfo);
954}
955
956// Return a VSETVLIInfo representing the changes made by this VSETVLI or
957// VSETIVLI instruction.
958VSETVLIInfo
959RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
960 VSETVLIInfo NewInfo;
961 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
962 NewInfo.setAVLImm(MI.getOperand(1).getImm());
963 } else {
964 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
965 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
966 Register AVLReg = MI.getOperand(1).getReg();
967 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
968 "Can't handle X0, X0 vsetvli yet");
969 if (AVLReg == RISCV::X0)
970 NewInfo.setAVLVLMAX();
971 else if (MI.getOperand(1).isUndef())
972 // Otherwise use an AVL of 1 to avoid depending on previous vl.
973 NewInfo.setAVLImm(1);
974 else {
975 VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
976 NewInfo.setAVLRegDef(VNI, AVLReg);
977 }
978 }
979 NewInfo.setVTYPE(MI.getOperand(2).getImm());
980
981 forwardVSETVLIAVL(NewInfo);
982
983 return NewInfo;
984}
985
986static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
987 RISCVII::VLMUL VLMul) {
988 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
989 if (Fractional)
990 VLEN = VLEN / LMul;
991 else
992 VLEN = VLEN * LMul;
993 return VLEN/SEW;
994}
995
996VSETVLIInfo
997RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
998 VSETVLIInfo InstrInfo;
999 const uint64_t TSFlags = MI.getDesc().TSFlags;
1000
1001 bool TailAgnostic = true;
1002 bool MaskAgnostic = true;
1003 if (!hasUndefinedPassthru(MI)) {
1004 // Start with undisturbed.
1005 TailAgnostic = false;
1006 MaskAgnostic = false;
1007
1008 // If there is a policy operand, use it.
1009 if (RISCVII::hasVecPolicyOp(TSFlags)) {
1010 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
1011 uint64_t Policy = Op.getImm();
1013 "Invalid Policy Value");
1014 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
1015 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
1016 }
1017
1018 // Some pseudo instructions force a tail agnostic policy despite having a
1019 // tied def.
1020 if (RISCVII::doesForceTailAgnostic(TSFlags))
1021 TailAgnostic = true;
1022
1023 if (!RISCVII::usesMaskPolicy(TSFlags))
1024 MaskAgnostic = true;
1025 }
1026
1027 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
1028
1029 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
1030 // A Log2SEW of 0 is an operation on mask registers only.
1031 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
1032 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
1033
1034 if (RISCVII::hasVLOp(TSFlags)) {
1035 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1036 if (VLOp.isImm()) {
1037 int64_t Imm = VLOp.getImm();
1038 // Conver the VLMax sentintel to X0 register.
1039 if (Imm == RISCV::VLMaxSentinel) {
1040 // If we know the exact VLEN, see if we can use the constant encoding
1041 // for the VLMAX instead. This reduces register pressure slightly.
1042 const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul);
1043 if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
1044 InstrInfo.setAVLImm(VLMAX);
1045 else
1046 InstrInfo.setAVLVLMAX();
1047 }
1048 else
1049 InstrInfo.setAVLImm(Imm);
1050 } else if (VLOp.isUndef()) {
1051 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1052 InstrInfo.setAVLImm(1);
1053 } else {
1054 VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
1055 InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
1056 }
1057 } else {
1058 assert(isScalarExtractInstr(MI));
1059 // Pick a random value for state tracking purposes, will be ignored via
1060 // the demanded fields mechanism
1061 InstrInfo.setAVLImm(1);
1062 }
1063#ifndef NDEBUG
1064 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
1065 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
1066 }
1067#endif
1068 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
1069
1070 forwardVSETVLIAVL(InstrInfo);
1071
1072 return InstrInfo;
1073}
1074
1075void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1077 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1078
1079 ++NumInsertedVSETVL;
1080 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1081 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1082 // VLMAX.
1083 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1084 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1086 .addReg(RISCV::X0, RegState::Kill)
1087 .addImm(Info.encodeVTYPE())
1088 .addReg(RISCV::VL, RegState::Implicit);
1089 if (LIS)
1091 return;
1092 }
1093
1094 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1095 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1096 // same, we can use the X0, X0 form.
1097 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1098 if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
1099 DefMI && isVectorConfigInstr(*DefMI)) {
1100 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1101 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1102 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1104 .addReg(RISCV::X0, RegState::Kill)
1105 .addImm(Info.encodeVTYPE())
1106 .addReg(RISCV::VL, RegState::Implicit);
1107 if (LIS)
1109 return;
1110 }
1111 }
1112 }
1113 }
1114
1115 if (Info.hasAVLImm()) {
1116 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1118 .addImm(Info.getAVLImm())
1119 .addImm(Info.encodeVTYPE());
1120 if (LIS)
1122 return;
1123 }
1124
1125 if (Info.hasAVLVLMAX()) {
1126 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
1127 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1129 .addReg(RISCV::X0, RegState::Kill)
1130 .addImm(Info.encodeVTYPE());
1131 if (LIS) {
1133 LIS->createAndComputeVirtRegInterval(DestReg);
1134 }
1135 return;
1136 }
1137
1138 Register AVLReg = Info.getAVLReg();
1139 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1140 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1142 .addReg(AVLReg)
1143 .addImm(Info.encodeVTYPE());
1144 if (LIS) {
1146 LiveInterval &LI = LIS->getInterval(AVLReg);
1148 // If the AVL value isn't live at MI, do a quick check to see if it's easily
1149 // extendable. Otherwise, we need to copy it.
1150 if (LI.getVNInfoBefore(SI) != Info.getAVLVNInfo()) {
1151 if (!LI.liveAt(SI) && LI.containsOneValue())
1152 LIS->extendToIndices(LI, SI);
1153 else {
1154 Register AVLCopyReg =
1155 MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
1157 if (Info.getAVLVNInfo()->isPHIDef())
1158 II = LIS->getMBBFromIndex(Info.getAVLVNInfo()->def)->getFirstNonPHI();
1159 else {
1160 II = LIS->getInstructionFromIndex(Info.getAVLVNInfo()->def);
1161 II = std::next(II);
1162 }
1163 assert(II.isValid());
1164 auto AVLCopy =
1165 BuildMI(*II->getParent(), II, DL, TII->get(RISCV::COPY), AVLCopyReg)
1166 .addReg(AVLReg);
1167 LIS->InsertMachineInstrInMaps(*AVLCopy);
1168 MI->getOperand(1).setReg(AVLCopyReg);
1169 LIS->createAndComputeVirtRegInterval(AVLCopyReg);
1170 }
1171 }
1172 }
1173}
1174
1175/// Return true if a VSETVLI is required to transition from CurInfo to Require
1176/// given a set of DemandedFields \p Used.
1177bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
1178 const VSETVLIInfo &Require,
1179 const VSETVLIInfo &CurInfo) const {
1180 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1181 return true;
1182
1183 if (CurInfo.isCompatible(Used, Require, LIS))
1184 return false;
1185
1186 return true;
1187}
1188
1189// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1190// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1191// places.
1192static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1193 DemandedFields &Demanded) {
1194 VSETVLIInfo Info = NewInfo;
1195
1196 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1197 !PrevInfo.isUnknown()) {
1198 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1199 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1200 Info.setVLMul(*NewVLMul);
1201 Demanded.LMUL = DemandedFields::LMULEqual;
1202 }
1203
1204 return Info;
1205}
1206
1207// Given an incoming state reaching MI, minimally modifies that state so that it
1208// is compatible with MI. The resulting state is guaranteed to be semantically
1209// legal for MI, but may not be the state requested by MI.
1210void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1211 const MachineInstr &MI) const {
1212 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1213 return;
1214
1215 DemandedFields Demanded = getDemanded(MI, ST);
1216
1217 const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
1218 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1219 if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
1220 return;
1221
1222 const VSETVLIInfo PrevInfo = Info;
1223 if (!Info.isValid() || Info.isUnknown())
1224 Info = NewInfo;
1225
1226 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1227
1228 // If MI only demands that VL has the same zeroness, we only need to set the
1229 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1230 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1231 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1232 // variant, so we avoid the transform to prevent extending live range of an
1233 // avl register operand.
1234 // TODO: We can probably relax this for immediates.
1235 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
1236 IncomingInfo.hasSameVLMAX(PrevInfo);
1237 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1238 Info.setAVL(IncomingInfo);
1239
1240 Info.setVTYPE(
1241 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1242 .getVLMUL(),
1243 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1244 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1245 // if needed.
1246 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1247 IncomingInfo.getTailAgnostic(),
1248 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1249 IncomingInfo.getMaskAgnostic());
1250
1251 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1252 // the AVL.
1253 if (Info.hasSEWLMULRatioOnly()) {
1254 VSETVLIInfo RatiolessInfo = IncomingInfo;
1255 RatiolessInfo.setAVL(Info);
1256 Info = RatiolessInfo;
1257 }
1258}
1259
1260// Given a state with which we evaluated MI (see transferBefore above for why
1261// this might be different that the state MI requested), modify the state to
1262// reflect the changes MI might make.
1263void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1264 const MachineInstr &MI) const {
1265 if (isVectorConfigInstr(MI)) {
1266 Info = getInfoForVSETVLI(MI);
1267 return;
1268 }
1269
1271 // Update AVL to vl-output of the fault first load.
1272 assert(MI.getOperand(1).getReg().isVirtual());
1273 if (LIS) {
1274 auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1275 SlotIndex SI =
1277 VNInfo *VNI = LI.getVNInfoAt(SI);
1278 Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1279 } else
1280 Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
1281 return;
1282 }
1283
1284 // If this is something that updates VL/VTYPE that we don't know about, set
1285 // the state to unknown.
1286 if (MI.isCall() || MI.isInlineAsm() ||
1287 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1288 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1289 Info = VSETVLIInfo::getUnknown();
1290}
1291
1292bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1293 VSETVLIInfo &Info) const {
1294 bool HadVectorOp = false;
1295
1296 Info = BlockInfo[MBB.getNumber()].Pred;
1297 for (const MachineInstr &MI : MBB) {
1298 transferBefore(Info, MI);
1299
1300 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1301 HadVectorOp = true;
1302
1303 transferAfter(Info, MI);
1304 }
1305
1306 return HadVectorOp;
1307}
1308
1309void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1310
1311 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1312
1313 BBInfo.InQueue = false;
1314
1315 // Start with the previous entry so that we keep the most conservative state
1316 // we have ever found.
1317 VSETVLIInfo InInfo = BBInfo.Pred;
1318 if (MBB.pred_empty()) {
1319 // There are no predecessors, so use the default starting status.
1320 InInfo.setUnknown();
1321 } else {
1323 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1324 }
1325
1326 // If we don't have any valid predecessor value, wait until we do.
1327 if (!InInfo.isValid())
1328 return;
1329
1330 // If no change, no need to rerun block
1331 if (InInfo == BBInfo.Pred)
1332 return;
1333
1334 BBInfo.Pred = InInfo;
1335 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1336 << " changed to " << BBInfo.Pred << "\n");
1337
1338 // Note: It's tempting to cache the state changes here, but due to the
1339 // compatibility checks performed a blocks output state can change based on
1340 // the input state. To cache, we'd have to add logic for finding
1341 // never-compatible state changes.
1342 VSETVLIInfo TmpStatus;
1343 computeVLVTYPEChanges(MBB, TmpStatus);
1344
1345 // If the new exit value matches the old exit value, we don't need to revisit
1346 // any blocks.
1347 if (BBInfo.Exit == TmpStatus)
1348 return;
1349
1350 BBInfo.Exit = TmpStatus;
1351 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1352 << " changed to " << BBInfo.Exit << "\n");
1353
1354 // Add the successors to the work list so we can propagate the changed exit
1355 // status.
1356 for (MachineBasicBlock *S : MBB.successors())
1357 if (!BlockInfo[S->getNumber()].InQueue) {
1358 BlockInfo[S->getNumber()].InQueue = true;
1359 WorkList.push(S);
1360 }
1361}
1362
1363// If we weren't able to prove a vsetvli was directly unneeded, it might still
1364// be unneeded if the AVL was a phi node where all incoming values are VL
1365// outputs from the last VSETVLI in their respective basic blocks.
1366bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1367 const MachineBasicBlock &MBB) const {
1368 if (!Require.hasAVLReg())
1369 return true;
1370
1371 if (!LIS)
1372 return true;
1373
1374 // We need the AVL to have been produced by a PHI node in this basic block.
1375 const VNInfo *Valno = Require.getAVLVNInfo();
1376 if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
1377 return true;
1378
1379 const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
1380
1381 for (auto *PBB : MBB.predecessors()) {
1382 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1383
1384 // We need the PHI input to the be the output of a VSET(I)VLI.
1385 const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
1386 if (!Value)
1387 return true;
1389 if (!DefMI || !isVectorConfigInstr(*DefMI))
1390 return true;
1391
1392 // We found a VSET(I)VLI make sure it matches the output of the
1393 // predecessor block.
1394 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1395 if (DefInfo != PBBExit)
1396 return true;
1397
1398 // Require has the same VL as PBBExit, so if the exit from the
1399 // predecessor has the VTYPE we are looking for we might be able
1400 // to avoid a VSETVLI.
1401 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1402 return true;
1403 }
1404
1405 // If all the incoming values to the PHI checked out, we don't need
1406 // to insert a VSETVLI.
1407 return false;
1408}
1409
1410void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1411 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1412 // Track whether the prefix of the block we've scanned is transparent
1413 // (meaning has not yet changed the abstract state).
1414 bool PrefixTransparent = true;
1415 for (MachineInstr &MI : MBB) {
1416 const VSETVLIInfo PrevInfo = CurInfo;
1417 transferBefore(CurInfo, MI);
1418
1419 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1420 if (isVectorConfigInstr(MI)) {
1421 // Conservatively, mark the VL and VTYPE as live.
1422 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1423 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1424 "Unexpected operands where VL and VTYPE should be");
1425 MI.getOperand(3).setIsDead(false);
1426 MI.getOperand(4).setIsDead(false);
1427 PrefixTransparent = false;
1428 }
1429
1430 uint64_t TSFlags = MI.getDesc().TSFlags;
1431 if (RISCVII::hasSEWOp(TSFlags)) {
1432 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1433 // If this is the first implicit state change, and the state change
1434 // requested can be proven to produce the same register contents, we
1435 // can skip emitting the actual state change and continue as if we
1436 // had since we know the GPR result of the implicit state change
1437 // wouldn't be used and VL/VTYPE registers are correct. Note that
1438 // we *do* need to model the state as if it changed as while the
1439 // register contents are unchanged, the abstract model can change.
1440 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1441 insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1442 PrefixTransparent = false;
1443 }
1444
1445 if (RISCVII::hasVLOp(TSFlags)) {
1446 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1447 if (VLOp.isReg()) {
1448 Register Reg = VLOp.getReg();
1449
1450 // Erase the AVL operand from the instruction.
1451 VLOp.setReg(RISCV::NoRegister);
1452 VLOp.setIsKill(false);
1453 if (LIS) {
1454 LiveInterval &LI = LIS->getInterval(Reg);
1456 LIS->shrinkToUses(&LI, &DeadMIs);
1457 // We might have separate components that need split due to
1458 // needVSETVLIPHI causing us to skip inserting a new VL def.
1460 LIS->splitSeparateComponents(LI, SplitLIs);
1461
1462 // If the AVL was an immediate > 31, then it would have been emitted
1463 // as an ADDI. However, the ADDI might not have been used in the
1464 // vsetvli, or a vsetvli might not have been emitted, so it may be
1465 // dead now.
1466 for (MachineInstr *DeadMI : DeadMIs) {
1467 if (!TII->isAddImmediate(*DeadMI, Reg))
1468 continue;
1469 LIS->RemoveMachineInstrFromMaps(*DeadMI);
1470 DeadMI->eraseFromParent();
1471 }
1472 }
1473 }
1474 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1475 /*isImp*/ true));
1476 }
1477 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1478 /*isImp*/ true));
1479 }
1480
1481 if (MI.isCall() || MI.isInlineAsm() ||
1482 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1483 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1484 PrefixTransparent = false;
1485
1486 transferAfter(CurInfo, MI);
1487 }
1488
1489 const auto &Info = BlockInfo[MBB.getNumber()];
1490 if (CurInfo != Info.Exit) {
1491 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1492 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1493 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1494 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1495 }
1496 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1497}
1498
1499/// Perform simple partial redundancy elimination of the VSETVLI instructions
1500/// we're about to insert by looking for cases where we can PRE from the
1501/// beginning of one block to the end of one of its predecessors. Specifically,
1502/// this is geared to catch the common case of a fixed length vsetvl in a single
1503/// block loop when it could execute once in the preheader instead.
1504void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1505 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1506 return;
1507
1508 MachineBasicBlock *UnavailablePred = nullptr;
1509 VSETVLIInfo AvailableInfo;
1510 for (MachineBasicBlock *P : MBB.predecessors()) {
1511 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1512 if (PredInfo.isUnknown()) {
1513 if (UnavailablePred)
1514 return;
1515 UnavailablePred = P;
1516 } else if (!AvailableInfo.isValid()) {
1517 AvailableInfo = PredInfo;
1518 } else if (AvailableInfo != PredInfo) {
1519 return;
1520 }
1521 }
1522
1523 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1524 // phase 3.
1525 if (!UnavailablePred || !AvailableInfo.isValid())
1526 return;
1527
1528 if (!LIS)
1529 return;
1530
1531 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1532 // the unavailable pred.
1533 if (AvailableInfo.hasSEWLMULRatioOnly())
1534 return;
1535
1536 // Critical edge - TODO: consider splitting?
1537 if (UnavailablePred->succ_size() != 1)
1538 return;
1539
1540 // If the AVL value is a register (other than our VLMAX sentinel),
1541 // we need to prove the value is available at the point we're going
1542 // to insert the vsetvli at.
1543 if (AvailableInfo.hasAVLReg()) {
1544 SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
1545 // This is an inline dominance check which covers the case of
1546 // UnavailablePred being the preheader of a loop.
1547 if (LIS->getMBBFromIndex(SI) != UnavailablePred)
1548 return;
1549 if (!UnavailablePred->terminators().empty() &&
1550 SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
1551 return;
1552 }
1553
1554 // Model the effect of changing the input state of the block MBB to
1555 // AvailableInfo. We're looking for two issues here; one legality,
1556 // one profitability.
1557 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1558 // may hit the end of the block with a different end state. We can
1559 // not make this change without reflowing later blocks as well.
1560 // 2) If we don't actually remove a transition, inserting a vsetvli
1561 // into the predecessor block would be correct, but unprofitable.
1562 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1563 VSETVLIInfo CurInfo = AvailableInfo;
1564 int TransitionsRemoved = 0;
1565 for (const MachineInstr &MI : MBB) {
1566 const VSETVLIInfo LastInfo = CurInfo;
1567 const VSETVLIInfo LastOldInfo = OldInfo;
1568 transferBefore(CurInfo, MI);
1569 transferBefore(OldInfo, MI);
1570 if (CurInfo == LastInfo)
1571 TransitionsRemoved++;
1572 if (LastOldInfo == OldInfo)
1573 TransitionsRemoved--;
1574 transferAfter(CurInfo, MI);
1575 transferAfter(OldInfo, MI);
1576 if (CurInfo == OldInfo)
1577 // Convergence. All transitions after this must match by construction.
1578 break;
1579 }
1580 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1581 // Issues 1 and 2 above
1582 return;
1583
1584 // Finally, update both data flow state and insert the actual vsetvli.
1585 // Doing both keeps the code in sync with the dataflow results, which
1586 // is critical for correctness of phase 3.
1587 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1588 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1589 << UnavailablePred->getName() << " with state "
1590 << AvailableInfo << "\n");
1591 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1592 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1593
1594 // Note there's an implicit assumption here that terminators never use
1595 // or modify VL or VTYPE. Also, fallthrough will return end().
1596 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1597 insertVSETVLI(*UnavailablePred, InsertPt,
1598 UnavailablePred->findDebugLoc(InsertPt),
1599 AvailableInfo, OldExit);
1600}
1601
1602// Return true if we can mutate PrevMI to match MI without changing any the
1603// fields which would be observed.
1604bool RISCVInsertVSETVLI::canMutatePriorConfig(
1605 const MachineInstr &PrevMI, const MachineInstr &MI,
1606 const DemandedFields &Used) const {
1607 // If the VL values aren't equal, return false if either a) the former is
1608 // demanded, or b) we can't rewrite the former to be the later for
1609 // implementation reasons.
1610 if (!isVLPreservingConfig(MI)) {
1611 if (Used.VLAny)
1612 return false;
1613
1614 if (Used.VLZeroness) {
1615 if (isVLPreservingConfig(PrevMI))
1616 return false;
1617 if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
1618 LIS))
1619 return false;
1620 }
1621
1622 auto &AVL = MI.getOperand(1);
1623 auto &PrevAVL = PrevMI.getOperand(1);
1624
1625 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1626 // For now just check that PrevMI uses the same virtual register.
1627 if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
1628 (!MRI->hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
1629 PrevAVL.getReg() != AVL.getReg()))
1630 return false;
1631 }
1632
1633 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1634 auto PriorVType = PrevMI.getOperand(2).getImm();
1635 auto VType = MI.getOperand(2).getImm();
1636 return areCompatibleVTYPEs(PriorVType, VType, Used);
1637}
1638
1639void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
1640 MachineInstr *NextMI = nullptr;
1641 // We can have arbitrary code in successors, so VL and VTYPE
1642 // must be considered demanded.
1643 DemandedFields Used;
1644 Used.demandVL();
1645 Used.demandVTYPE();
1647
1648 auto dropAVLUse = [&](MachineOperand &MO) {
1649 if (!MO.isReg() || !MO.getReg().isVirtual())
1650 return;
1651 Register OldVLReg = MO.getReg();
1652 MO.setReg(RISCV::NoRegister);
1653
1654 if (LIS)
1655 LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1656
1657 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1658 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1659 MRI->use_nodbg_empty(OldVLReg))
1660 ToDelete.push_back(VLOpDef);
1661 };
1662
1663 for (MachineInstr &MI :
1665
1666 if (!isVectorConfigInstr(MI)) {
1667 Used.doUnion(getDemanded(MI, ST));
1668 if (MI.isCall() || MI.isInlineAsm() ||
1669 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1670 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1671 NextMI = nullptr;
1672 continue;
1673 }
1674
1675 if (!MI.getOperand(0).isDead())
1676 Used.demandVL();
1677
1678 if (NextMI) {
1679 if (!Used.usedVL() && !Used.usedVTYPE()) {
1680 dropAVLUse(MI.getOperand(1));
1681 if (LIS)
1683 MI.eraseFromParent();
1684 NumCoalescedVSETVL++;
1685 // Leave NextMI unchanged
1686 continue;
1687 }
1688
1689 if (canMutatePriorConfig(MI, *NextMI, Used)) {
1690 if (!isVLPreservingConfig(*NextMI)) {
1691 Register DefReg = NextMI->getOperand(0).getReg();
1692
1693 MI.getOperand(0).setReg(DefReg);
1694 MI.getOperand(0).setIsDead(false);
1695
1696 // The def of DefReg moved to MI, so extend the LiveInterval up to
1697 // it.
1698 if (DefReg.isVirtual() && LIS) {
1699 LiveInterval &DefLI = LIS->getInterval(DefReg);
1700 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1701 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1702 LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1703 DefLI.addSegment(S);
1704 DefVNI->def = MISlot;
1705 // Mark DefLI as spillable if it was previously unspillable
1706 DefLI.setWeight(0);
1707
1708 // DefReg may have had no uses, in which case we need to shrink
1709 // the LiveInterval up to MI.
1710 LIS->shrinkToUses(&DefLI);
1711 }
1712
1713 dropAVLUse(MI.getOperand(1));
1714 if (NextMI->getOperand(1).isImm())
1715 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1716 else
1717 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
1718 false);
1719
1720 MI.setDesc(NextMI->getDesc());
1721 }
1722 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1723
1724 dropAVLUse(NextMI->getOperand(1));
1725 if (LIS)
1726 LIS->RemoveMachineInstrFromMaps(*NextMI);
1727 NextMI->eraseFromParent();
1728 NumCoalescedVSETVL++;
1729 // fallthrough
1730 }
1731 }
1732 NextMI = &MI;
1733 Used = getDemanded(MI, ST);
1734 }
1735
1736 // Loop over the dead AVL values, and delete them now. This has
1737 // to be outside the above loop to avoid invalidating iterators.
1738 for (auto *MI : ToDelete) {
1739 if (LIS) {
1740 LIS->removeInterval(MI->getOperand(0).getReg());
1742 }
1743 MI->eraseFromParent();
1744 }
1745}
1746
1747void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1748 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1749 MachineInstr &MI = *I++;
1751 Register VLOutput = MI.getOperand(1).getReg();
1752 assert(VLOutput.isVirtual());
1753 if (!MI.getOperand(1).isDead()) {
1754 auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
1755 TII->get(RISCV::PseudoReadVL), VLOutput);
1756 // Move the LiveInterval's definition down to PseudoReadVL.
1757 if (LIS) {
1758 SlotIndex NewDefSI =
1759 LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
1760 LiveInterval &DefLI = LIS->getInterval(VLOutput);
1761 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1762 DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
1763 DefVNI->def = NewDefSI;
1764 }
1765 }
1766 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1767 MI.getOperand(1).setReg(RISCV::X0);
1768 }
1769 }
1770}
1771
1772bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1773 // Skip if the vector extension is not enabled.
1775 if (!ST->hasVInstructions())
1776 return false;
1777
1778 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1779
1780 TII = ST->getInstrInfo();
1781 MRI = &MF.getRegInfo();
1782 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
1783 LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
1784
1785 assert(BlockInfo.empty() && "Expect empty block infos");
1786 BlockInfo.resize(MF.getNumBlockIDs());
1787
1788 bool HaveVectorOp = false;
1789
1790 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1791 for (const MachineBasicBlock &MBB : MF) {
1792 VSETVLIInfo TmpStatus;
1793 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1794 // Initial exit state is whatever change we found in the block.
1795 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1796 BBInfo.Exit = TmpStatus;
1797 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1798 << " is " << BBInfo.Exit << "\n");
1799
1800 }
1801
1802 // If we didn't find any instructions that need VSETVLI, we're done.
1803 if (!HaveVectorOp) {
1804 BlockInfo.clear();
1805 return false;
1806 }
1807
1808 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1809 // blocks to the list here, but will also add any that need to be revisited
1810 // during Phase 2 processing.
1811 for (const MachineBasicBlock &MBB : MF) {
1812 WorkList.push(&MBB);
1813 BlockInfo[MBB.getNumber()].InQueue = true;
1814 }
1815 while (!WorkList.empty()) {
1816 const MachineBasicBlock &MBB = *WorkList.front();
1817 WorkList.pop();
1818 computeIncomingVLVTYPE(MBB);
1819 }
1820
1821 // Perform partial redundancy elimination of vsetvli transitions.
1822 for (MachineBasicBlock &MBB : MF)
1823 doPRE(MBB);
1824
1825 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1826 // Phase 2 information to avoid adding vsetvlis before the first vector
1827 // instruction in the block if the VL/VTYPE is satisfied by its
1828 // predecessors.
1829 for (MachineBasicBlock &MBB : MF)
1830 emitVSETVLIs(MBB);
1831
1832 // Now that all vsetvlis are explicit, go through and do block local
1833 // DSE and peephole based demanded fields based transforms. Note that
1834 // this *must* be done outside the main dataflow so long as we allow
1835 // any cross block analysis within the dataflow. We can't have both
1836 // demanded fields based mutation and non-local analysis in the
1837 // dataflow at the same time without introducing inconsistencies.
1838 for (MachineBasicBlock &MBB : MF)
1839 coalesceVSETVLIs(MBB);
1840
1841 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1842 // of VLEFF/VLSEGFF.
1843 for (MachineBasicBlock &MBB : MF)
1844 insertReadVL(MBB);
1845
1846 BlockInfo.clear();
1847 return HaveVectorOp;
1848}
1849
1850/// Returns an instance of the Insert VSETVLI pass.
1852 return new RISCVInsertVSETVLI();
1853}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition: Compiler.h:147
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:533
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1309
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static ValueLatticeElement intersect(const ValueLatticeElement &A, const ValueLatticeElement &B)
Combine two sets of facts about the same value into a single set of facts.
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define RISCV_INSERT_VSETVLI_NAME
static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, DemandedFields &Demanded)
#define DEBUG_TYPE
static unsigned getSEWLMULRatio(const MachineInstr &MI)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
BlockData()=default
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
void setWeight(float Value)
Definition: LiveInterval.h:721
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
void extendToIndices(LiveRange &LR, ArrayRef< SlotIndex > Indices, ArrayRef< SlotIndex > Undefs)
Extend the live range LR to reach all points in Indices.
void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl< LiveInterval * > &SplitLIs)
Split separate components in LiveInterval LI into separate intervals.
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
bool containsOneValue() const
Definition: LiveInterval.h:311
void removeSegment(SlotIndex Start, SlotIndex End, bool RemoveDeadValNo=false)
Remove the specified interval from this live range.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
reverse_iterator rend()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
reverse_iterator rbegin()
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:566
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:379
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
static bool usesMaskPolicy(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool doesForceTailAgnostic(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
static RISCVII::VLMUL getVLMUL(unsigned VType)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
static bool isMaskAgnostic(unsigned VType)
static bool isValidSEW(unsigned SEW)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
static unsigned getSEW(unsigned VType)
std::optional< RISCVII::VLMUL > getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
bool isFaultFirstLoad(const MachineInstr &MI)
static constexpr int64_t VLMaxSentinel
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Uninitialized
Definition: Threading.h:61
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2052
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createRISCVInsertVSETVLIPass()
Returns an instance of the Insert VSETVLI pass.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
char & RISCVInsertVSETVLIID
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Status intersect(const Status &S) const
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162