LLVM 22.0.0git
RISCVInsertVSETVLI.cpp
Go to the documentation of this file.
1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
30#include "llvm/ADT/Statistic.h"
35#include <queue>
36using namespace llvm;
37
38#define DEBUG_TYPE "riscv-insert-vsetvli"
39#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
40
41STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
43
45 DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden,
46 cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and "
47 "vill is cleared"),
48 cl::init(true));
49
50namespace {
51
52/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
53/// This will return nullptr if the virtual register is an implicit_def or
54/// if LiveIntervals is not available.
55static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
56 const LiveIntervals *LIS) {
57 assert(Reg.isVirtual());
58 if (!LIS)
59 return nullptr;
60 auto &LI = LIS->getInterval(Reg);
62 return LI.getVNInfoBefore(SI);
63}
64
65static unsigned getVLOpNum(const MachineInstr &MI) {
66 return RISCVII::getVLOpNum(MI.getDesc());
67}
68
69static unsigned getSEWOpNum(const MachineInstr &MI) {
70 return RISCVII::getSEWOpNum(MI.getDesc());
71}
72
73static unsigned getVecPolicyOpNum(const MachineInstr &MI) {
74 return RISCVII::getVecPolicyOpNum(MI.getDesc());
75}
76
77/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
78/// not a load or store which ignores SEW.
79static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
80 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
81 default:
82 return std::nullopt;
83 case RISCV::VLE8_V:
84 case RISCV::VLSE8_V:
85 case RISCV::VSE8_V:
86 case RISCV::VSSE8_V:
87 return 8;
88 case RISCV::VLE16_V:
89 case RISCV::VLSE16_V:
90 case RISCV::VSE16_V:
91 case RISCV::VSSE16_V:
92 return 16;
93 case RISCV::VLE32_V:
94 case RISCV::VLSE32_V:
95 case RISCV::VSE32_V:
96 case RISCV::VSSE32_V:
97 return 32;
98 case RISCV::VLE64_V:
99 case RISCV::VLSE64_V:
100 case RISCV::VSE64_V:
101 case RISCV::VSSE64_V:
102 return 64;
103 }
104}
105
106/// Return true if this is an operation on mask registers. Note that
107/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
108static bool isMaskRegOp(const MachineInstr &MI) {
109 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
110 return false;
111 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
112 // A Log2SEW of 0 is an operation on mask registers only.
113 return Log2SEW == 0;
114}
115
116/// Return true if the inactive elements in the result are entirely undefined.
117/// Note that this is different from "agnostic" as defined by the vector
118/// specification. Agnostic requires each lane to either be undisturbed, or
119/// take the value -1; no other value is allowed.
120static bool hasUndefinedPassthru(const MachineInstr &MI) {
121
122 unsigned UseOpIdx;
123 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
124 // If there is no passthrough operand, then the pass through
125 // lanes are undefined.
126 return true;
127
128 // All undefined passthrus should be $noreg: see
129 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
130 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
131 return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
132}
133
134/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
135static bool isVectorCopy(const TargetRegisterInfo *TRI,
136 const MachineInstr &MI) {
137 return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&
139 TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));
140}
141
142/// Which subfields of VL or VTYPE have values we need to preserve?
143struct DemandedFields {
144 // Some unknown property of VL is used. If demanded, must preserve entire
145 // value.
146 bool VLAny = false;
147 // Only zero vs non-zero is used. If demanded, can change non-zero values.
148 bool VLZeroness = false;
149 // What properties of SEW we need to preserve.
150 enum : uint8_t {
151 SEWEqual = 3, // The exact value of SEW needs to be preserved.
152 SEWGreaterThanOrEqualAndLessThan64 =
153 2, // SEW can be changed as long as it's greater
154 // than or equal to the original value, but must be less
155 // than 64.
156 SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
157 // than or equal to the original value.
158 SEWNone = 0 // We don't need to preserve SEW at all.
159 } SEW = SEWNone;
160 enum : uint8_t {
161 LMULEqual = 2, // The exact value of LMUL needs to be preserved.
162 LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
163 LMULNone = 0 // We don't need to preserve LMUL at all.
164 } LMUL = LMULNone;
165 bool SEWLMULRatio = false;
166 bool TailPolicy = false;
167 bool MaskPolicy = false;
168 // If this is true, we demand that VTYPE is set to some legal state, i.e. that
169 // vill is unset.
170 bool VILL = false;
171
172 // Return true if any part of VTYPE was used
173 bool usedVTYPE() const {
174 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
175 }
176
177 // Return true if any property of VL was used
178 bool usedVL() {
179 return VLAny || VLZeroness;
180 }
181
182 // Mark all VTYPE subfields and properties as demanded
183 void demandVTYPE() {
184 SEW = SEWEqual;
185 LMUL = LMULEqual;
186 SEWLMULRatio = true;
187 TailPolicy = true;
188 MaskPolicy = true;
189 VILL = true;
190 }
191
192 // Mark all VL properties as demanded
193 void demandVL() {
194 VLAny = true;
195 VLZeroness = true;
196 }
197
198 static DemandedFields all() {
199 DemandedFields DF;
200 DF.demandVTYPE();
201 DF.demandVL();
202 return DF;
203 }
204
205 // Make this the result of demanding both the fields in this and B.
206 void doUnion(const DemandedFields &B) {
207 VLAny |= B.VLAny;
208 VLZeroness |= B.VLZeroness;
209 SEW = std::max(SEW, B.SEW);
210 LMUL = std::max(LMUL, B.LMUL);
211 SEWLMULRatio |= B.SEWLMULRatio;
212 TailPolicy |= B.TailPolicy;
213 MaskPolicy |= B.MaskPolicy;
214 VILL |= B.VILL;
215 }
216
217#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
218 /// Support for debugging, callable in GDB: V->dump()
219 LLVM_DUMP_METHOD void dump() const {
220 print(dbgs());
221 dbgs() << "\n";
222 }
223
224 /// Implement operator<<.
225 void print(raw_ostream &OS) const {
226 OS << "{";
227 OS << "VLAny=" << VLAny << ", ";
228 OS << "VLZeroness=" << VLZeroness << ", ";
229 OS << "SEW=";
230 switch (SEW) {
231 case SEWEqual:
232 OS << "SEWEqual";
233 break;
234 case SEWGreaterThanOrEqual:
235 OS << "SEWGreaterThanOrEqual";
236 break;
237 case SEWGreaterThanOrEqualAndLessThan64:
238 OS << "SEWGreaterThanOrEqualAndLessThan64";
239 break;
240 case SEWNone:
241 OS << "SEWNone";
242 break;
243 };
244 OS << ", ";
245 OS << "LMUL=";
246 switch (LMUL) {
247 case LMULEqual:
248 OS << "LMULEqual";
249 break;
250 case LMULLessThanOrEqualToM1:
251 OS << "LMULLessThanOrEqualToM1";
252 break;
253 case LMULNone:
254 OS << "LMULNone";
255 break;
256 };
257 OS << ", ";
258 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
259 OS << "TailPolicy=" << TailPolicy << ", ";
260 OS << "MaskPolicy=" << MaskPolicy << ", ";
261 OS << "VILL=" << VILL;
262 OS << "}";
263 }
264#endif
265};
266
267#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
269inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
270 DF.print(OS);
271 return OS;
272}
273#endif
274
275static bool isLMUL1OrSmaller(RISCVVType::VLMUL LMUL) {
276 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
277 return Fractional || LMul == 1;
278}
279
280/// Return true if moving from CurVType to NewVType is
281/// indistinguishable from the perspective of an instruction (or set
282/// of instructions) which use only the Used subfields and properties.
283static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
284 const DemandedFields &Used) {
285 switch (Used.SEW) {
286 case DemandedFields::SEWNone:
287 break;
288 case DemandedFields::SEWEqual:
289 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
290 return false;
291 break;
292 case DemandedFields::SEWGreaterThanOrEqual:
293 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
294 return false;
295 break;
296 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
297 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
298 RISCVVType::getSEW(NewVType) >= 64)
299 return false;
300 break;
301 }
302
303 switch (Used.LMUL) {
304 case DemandedFields::LMULNone:
305 break;
306 case DemandedFields::LMULEqual:
307 if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
308 return false;
309 break;
310 case DemandedFields::LMULLessThanOrEqualToM1:
311 if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
312 return false;
313 break;
314 }
315
316 if (Used.SEWLMULRatio) {
317 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
318 RISCVVType::getVLMUL(CurVType));
319 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
320 RISCVVType::getVLMUL(NewVType));
321 if (Ratio1 != Ratio2)
322 return false;
323 }
324
325 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
327 return false;
328 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
330 return false;
331 return true;
332}
333
334/// Return the fields and properties demanded by the provided instruction.
335DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
336 // This function works in coalesceVSETVLI too. We can still use the value of a
337 // SEW, VL, or Policy operand even though it might not be the exact value in
338 // the VL or VTYPE, since we only care about what the instruction originally
339 // demanded.
340
341 // Most instructions don't use any of these subfeilds.
342 DemandedFields Res;
343 // Start conservative if registers are used
344 if (MI.isCall() || MI.isInlineAsm() ||
345 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
346 Res.demandVL();
347 if (MI.isCall() || MI.isInlineAsm() ||
348 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
349 Res.demandVTYPE();
350 // Start conservative on the unlowered form too
351 uint64_t TSFlags = MI.getDesc().TSFlags;
352 if (RISCVII::hasSEWOp(TSFlags)) {
353 Res.demandVTYPE();
354 if (RISCVII::hasVLOp(TSFlags))
355 if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
356 !VLOp.isReg() || !VLOp.isUndef())
357 Res.demandVL();
358
359 // Behavior is independent of mask policy.
360 if (!RISCVII::usesMaskPolicy(TSFlags))
361 Res.MaskPolicy = false;
362 }
363
364 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
365 // They instead demand the ratio of the two which is used in computing
366 // EMUL, but which allows us the flexibility to change SEW and LMUL
367 // provided we don't change the ratio.
368 // Note: We assume that the instructions initial SEW is the EEW encoded
369 // in the opcode. This is asserted when constructing the VSETVLIInfo.
370 if (getEEWForLoadStore(MI)) {
371 Res.SEW = DemandedFields::SEWNone;
372 Res.LMUL = DemandedFields::LMULNone;
373 }
374
375 // Store instructions don't use the policy fields.
376 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
377 Res.TailPolicy = false;
378 Res.MaskPolicy = false;
379 }
380
381 // If this is a mask reg operation, it only cares about VLMAX.
382 // TODO: Possible extensions to this logic
383 // * Probably ok if available VLMax is larger than demanded
384 // * The policy bits can probably be ignored..
385 if (isMaskRegOp(MI)) {
386 Res.SEW = DemandedFields::SEWNone;
387 Res.LMUL = DemandedFields::LMULNone;
388 }
389
390 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
391 if (RISCVInstrInfo::isScalarInsertInstr(MI)) {
392 Res.LMUL = DemandedFields::LMULNone;
393 Res.SEWLMULRatio = false;
394 Res.VLAny = false;
395 // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't
396 // need to preserve any other bits and are thus compatible with any larger,
397 // etype and can disregard policy bits. Warning: It's tempting to try doing
398 // this for any tail agnostic operation, but we can't as TA requires
399 // tail lanes to either be the original value or -1. We are writing
400 // unknown bits to the lanes here.
401 if (hasUndefinedPassthru(MI)) {
402 if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
403 !ST->hasVInstructionsF64())
404 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
405 else
406 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
407 Res.TailPolicy = false;
408 }
409 }
410
411 // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW.
412 if (RISCVInstrInfo::isScalarExtractInstr(MI)) {
413 assert(!RISCVII::hasVLOp(TSFlags));
414 Res.LMUL = DemandedFields::LMULNone;
415 Res.SEWLMULRatio = false;
416 Res.TailPolicy = false;
417 Res.MaskPolicy = false;
418 }
419
420 if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
421 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
422 // A slidedown/slideup with an *undefined* passthru can freely clobber
423 // elements not copied from the source vector (e.g. masked off, tail, or
424 // slideup's prefix). Notes:
425 // * We can't modify SEW here since the slide amount is in units of SEW.
426 // * VL=1 is special only because we have existing support for zero vs
427 // non-zero VL. We could generalize this if we had a VL > C predicate.
428 // * The LMUL1 restriction is for machines whose latency may depend on LMUL.
429 // * As above, this is only legal for tail "undefined" not "agnostic".
430 // * We avoid increasing vl if the subtarget has +vl-dependent-latency
431 if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() &&
432 VLOp.getImm() == 1 && hasUndefinedPassthru(MI) &&
433 !ST->hasVLDependentLatency()) {
434 Res.VLAny = false;
435 Res.VLZeroness = true;
436 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
437 Res.TailPolicy = false;
438 }
439
440 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
441 // same semantically as vmv.s.x. This is particularly useful since we don't
442 // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
443 // it's place. Since a splat is non-constant time in LMUL, we do need to be
444 // careful to not increase the number of active vector registers (unlike for
445 // vmv.s.x.)
446 if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() &&
447 VLOp.getImm() == 1 && hasUndefinedPassthru(MI) &&
448 !ST->hasVLDependentLatency()) {
449 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
450 Res.SEWLMULRatio = false;
451 Res.VLAny = false;
452 if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
453 !ST->hasVInstructionsF64())
454 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
455 else
456 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
457 Res.TailPolicy = false;
458 }
459 }
460
461 // In §32.16.6, whole vector register moves have a dependency on SEW. At the
462 // MIR level though we don't encode the element type, and it gives the same
463 // result whatever the SEW may be.
464 //
465 // However it does need valid SEW, i.e. vill must be cleared. The entry to a
466 // function, calls and inline assembly may all set it, so make sure we clear
467 // it for whole register copies. Do this by leaving VILL demanded.
468 if (isVectorCopy(ST->getRegisterInfo(), MI)) {
469 Res.LMUL = DemandedFields::LMULNone;
470 Res.SEW = DemandedFields::SEWNone;
471 Res.SEWLMULRatio = false;
472 Res.TailPolicy = false;
473 Res.MaskPolicy = false;
474 }
475
476 if (RISCVInstrInfo::isVExtractInstr(MI)) {
477 assert(!RISCVII::hasVLOp(TSFlags));
478 // TODO: LMUL can be any larger value (without cost)
479 Res.TailPolicy = false;
480 }
481
482 return Res;
483}
484
485/// Defines the abstract state with which the forward dataflow models the
486/// values of the VL and VTYPE registers after insertion.
487class VSETVLIInfo {
488 struct AVLDef {
489 // Every AVLDef should have a VNInfo, unless we're running without
490 // LiveIntervals in which case this will be nullptr.
491 const VNInfo *ValNo;
492 Register DefReg;
493 };
494 union {
495 AVLDef AVLRegDef;
496 unsigned AVLImm;
497 };
498
499 enum : uint8_t {
500 Uninitialized,
501 AVLIsReg,
502 AVLIsImm,
503 AVLIsVLMAX,
504 Unknown, // AVL and VTYPE are fully unknown
505 } State = Uninitialized;
506
507 // Fields from VTYPE.
509 uint8_t SEW = 0;
510 uint8_t TailAgnostic : 1;
511 uint8_t MaskAgnostic : 1;
512 uint8_t SEWLMULRatioOnly : 1;
513
514public:
515 VSETVLIInfo()
516 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
517 SEWLMULRatioOnly(false) {}
518
519 static VSETVLIInfo getUnknown() {
520 VSETVLIInfo Info;
521 Info.setUnknown();
522 return Info;
523 }
524
525 bool isValid() const { return State != Uninitialized; }
526 void setUnknown() { State = Unknown; }
527 bool isUnknown() const { return State == Unknown; }
528
529 void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
530 assert(AVLReg.isVirtual());
531 AVLRegDef.ValNo = VNInfo;
532 AVLRegDef.DefReg = AVLReg;
533 State = AVLIsReg;
534 }
535
536 void setAVLImm(unsigned Imm) {
537 AVLImm = Imm;
538 State = AVLIsImm;
539 }
540
541 void setAVLVLMAX() { State = AVLIsVLMAX; }
542
543 bool hasAVLImm() const { return State == AVLIsImm; }
544 bool hasAVLReg() const { return State == AVLIsReg; }
545 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
546 Register getAVLReg() const {
547 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
548 return AVLRegDef.DefReg;
549 }
550 unsigned getAVLImm() const {
551 assert(hasAVLImm());
552 return AVLImm;
553 }
554 const VNInfo *getAVLVNInfo() const {
555 assert(hasAVLReg());
556 return AVLRegDef.ValNo;
557 }
558 // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
559 // a PHI node. In that case getAVLVNInfo()->def will point to the block
560 // boundary slot and this will return nullptr. If LiveIntervals isn't
561 // available, nullptr is also returned.
562 const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
563 assert(hasAVLReg());
564 if (!LIS || getAVLVNInfo()->isPHIDef())
565 return nullptr;
566 auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
567 assert(MI);
568 return MI;
569 }
570
571 void setAVL(const VSETVLIInfo &Info) {
572 assert(Info.isValid());
573 if (Info.isUnknown())
574 setUnknown();
575 else if (Info.hasAVLReg())
576 setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
577 else if (Info.hasAVLVLMAX())
578 setAVLVLMAX();
579 else {
580 assert(Info.hasAVLImm());
581 setAVLImm(Info.getAVLImm());
582 }
583 }
584
585 unsigned getSEW() const { return SEW; }
586 RISCVVType::VLMUL getVLMUL() const { return VLMul; }
587 bool getTailAgnostic() const { return TailAgnostic; }
588 bool getMaskAgnostic() const { return MaskAgnostic; }
589
590 bool hasNonZeroAVL(const LiveIntervals *LIS) const {
591 if (hasAVLImm())
592 return getAVLImm() > 0;
593 if (hasAVLReg()) {
594 if (auto *DefMI = getAVLDefMI(LIS))
595 return RISCVInstrInfo::isNonZeroLoadImmediate(*DefMI);
596 }
597 if (hasAVLVLMAX())
598 return true;
599 return false;
600 }
601
602 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
603 const LiveIntervals *LIS) const {
604 if (hasSameAVL(Other))
605 return true;
606 return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
607 }
608
609 bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
610 if (hasAVLReg() && Other.hasAVLReg()) {
611 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
612 "we either have intervals or we don't");
613 if (!getAVLVNInfo())
614 return getAVLReg() == Other.getAVLReg();
615 return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
616 getAVLReg() == Other.getAVLReg();
617 }
618
619 if (hasAVLImm() && Other.hasAVLImm())
620 return getAVLImm() == Other.getAVLImm();
621
622 if (hasAVLVLMAX())
623 return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
624
625 return false;
626 }
627
628 // Return true if the two lattice values are guaranteed to have
629 // the same AVL value at runtime.
630 bool hasSameAVL(const VSETVLIInfo &Other) const {
631 // Without LiveIntervals, we don't know which instruction defines a
632 // register. Since a register may be redefined, this means all AVLIsReg
633 // states must be treated as possibly distinct.
634 if (hasAVLReg() && Other.hasAVLReg()) {
635 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
636 "we either have intervals or we don't");
637 if (!getAVLVNInfo())
638 return false;
639 }
640 return hasSameAVLLatticeValue(Other);
641 }
642
643 void setVTYPE(unsigned VType) {
644 assert(isValid() && !isUnknown() &&
645 "Can't set VTYPE for uninitialized or unknown");
646 VLMul = RISCVVType::getVLMUL(VType);
647 SEW = RISCVVType::getSEW(VType);
648 TailAgnostic = RISCVVType::isTailAgnostic(VType);
649 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
650 }
651 void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) {
652 assert(isValid() && !isUnknown() &&
653 "Can't set VTYPE for uninitialized or unknown");
654 VLMul = L;
655 SEW = S;
656 TailAgnostic = TA;
657 MaskAgnostic = MA;
658 }
659
660 void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; }
661
662 unsigned encodeVTYPE() const {
663 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
664 "Can't encode VTYPE for uninitialized or unknown");
665 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
666 }
667
668 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
669
670 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
671 assert(isValid() && Other.isValid() &&
672 "Can't compare invalid VSETVLIInfos");
673 assert(!isUnknown() && !Other.isUnknown() &&
674 "Can't compare VTYPE in unknown state");
675 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
676 "Can't compare when only LMUL/SEW ratio is valid.");
677 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
678 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
679 Other.MaskAgnostic);
680 }
681
682 unsigned getSEWLMULRatio() const {
683 assert(isValid() && !isUnknown() &&
684 "Can't use VTYPE for uninitialized or unknown");
685 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
686 }
687
688 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
689 // Note that having the same VLMAX ensures that both share the same
690 // function from AVL to VL; that is, they must produce the same VL value
691 // for any given AVL value.
692 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
693 assert(isValid() && Other.isValid() &&
694 "Can't compare invalid VSETVLIInfos");
695 assert(!isUnknown() && !Other.isUnknown() &&
696 "Can't compare VTYPE in unknown state");
697 return getSEWLMULRatio() == Other.getSEWLMULRatio();
698 }
699
700 bool hasCompatibleVTYPE(const DemandedFields &Used,
701 const VSETVLIInfo &Require) const {
702 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
703 }
704
705 // Determine whether the vector instructions requirements represented by
706 // Require are compatible with the previous vsetvli instruction represented
707 // by this. MI is the instruction whose requirements we're considering.
708 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
709 const LiveIntervals *LIS) const {
710 assert(isValid() && Require.isValid() &&
711 "Can't compare invalid VSETVLIInfos");
712 // Nothing is compatible with Unknown.
713 if (isUnknown() || Require.isUnknown())
714 return false;
715
716 // If only our VLMAX ratio is valid, then this isn't compatible.
717 if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
718 return false;
719
720 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
721 return false;
722
723 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
724 return false;
725
726 return hasCompatibleVTYPE(Used, Require);
727 }
728
729 bool operator==(const VSETVLIInfo &Other) const {
730 // Uninitialized is only equal to another Uninitialized.
731 if (!isValid())
732 return !Other.isValid();
733 if (!Other.isValid())
734 return !isValid();
735
736 // Unknown is only equal to another Unknown.
737 if (isUnknown())
738 return Other.isUnknown();
739 if (Other.isUnknown())
740 return isUnknown();
741
742 if (!hasSameAVLLatticeValue(Other))
743 return false;
744
745 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
746 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
747 return false;
748
749 // If only the VLMAX is valid, check that it is the same.
750 if (SEWLMULRatioOnly)
751 return hasSameVLMAX(Other);
752
753 // If the full VTYPE is valid, check that it is the same.
754 return hasSameVTYPE(Other);
755 }
756
757 bool operator!=(const VSETVLIInfo &Other) const {
758 return !(*this == Other);
759 }
760
761 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
762 // both predecessors.
763 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
764 // If the new value isn't valid, ignore it.
765 if (!Other.isValid())
766 return *this;
767
768 // If this value isn't valid, this must be the first predecessor, use it.
769 if (!isValid())
770 return Other;
771
772 // If either is unknown, the result is unknown.
773 if (isUnknown() || Other.isUnknown())
774 return VSETVLIInfo::getUnknown();
775
776 // If we have an exact, match return this.
777 if (*this == Other)
778 return *this;
779
780 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
781 // return an SEW/LMUL ratio only value.
782 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
783 VSETVLIInfo MergeInfo = *this;
784 MergeInfo.SEWLMULRatioOnly = true;
785 return MergeInfo;
786 }
787
788 // Otherwise the result is unknown.
789 return VSETVLIInfo::getUnknown();
790 }
791
792#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
793 /// Support for debugging, callable in GDB: V->dump()
794 LLVM_DUMP_METHOD void dump() const {
795 print(dbgs());
796 dbgs() << "\n";
797 }
798
799 /// Implement operator<<.
800 /// @{
801 void print(raw_ostream &OS) const {
802 OS << "{";
803 if (!isValid())
804 OS << "Uninitialized";
805 if (isUnknown())
806 OS << "unknown";
807 if (hasAVLReg())
808 OS << "AVLReg=" << llvm::printReg(getAVLReg());
809 if (hasAVLImm())
810 OS << "AVLImm=" << (unsigned)AVLImm;
811 if (hasAVLVLMAX())
812 OS << "AVLVLMAX";
813 OS << ", ";
814
815 unsigned LMul;
816 bool Fractional;
817 std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
818
819 OS << "VLMul=";
820 if (Fractional)
821 OS << "mf";
822 else
823 OS << "m";
824 OS << LMul << ", "
825 << "SEW=e" << (unsigned)SEW << ", "
826 << "TailAgnostic=" << (bool)TailAgnostic << ", "
827 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
828 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
829 }
830#endif
831};
832
833#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
835inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
836 V.print(OS);
837 return OS;
838}
839#endif
840
841struct BlockData {
842 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
843 // block. Calculated in Phase 2.
844 VSETVLIInfo Exit;
845
846 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
847 // blocks. Calculated in Phase 2, and used by Phase 3.
848 VSETVLIInfo Pred;
849
850 // Keeps track of whether the block is already in the queue.
851 bool InQueue = false;
852
853 BlockData() = default;
854};
855
856class RISCVInsertVSETVLI : public MachineFunctionPass {
857 const RISCVSubtarget *ST;
858 const TargetInstrInfo *TII;
859 MachineRegisterInfo *MRI;
860 // Possibly null!
861 LiveIntervals *LIS;
862
863 std::vector<BlockData> BlockInfo;
864 std::queue<const MachineBasicBlock *> WorkList;
865
866public:
867 static char ID;
868
869 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
870 bool runOnMachineFunction(MachineFunction &MF) override;
871
872 void getAnalysisUsage(AnalysisUsage &AU) const override {
873 AU.setPreservesCFG();
874
875 AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
876 AU.addPreserved<LiveIntervalsWrapperPass>();
877 AU.addPreserved<SlotIndexesWrapperPass>();
878 AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
879 AU.addPreserved<LiveStacksWrapperLegacy>();
880
882 }
883
884 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
885
886private:
887 bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
888 const VSETVLIInfo &CurInfo) const;
889 bool needVSETVLIPHI(const VSETVLIInfo &Require,
890 const MachineBasicBlock &MBB) const;
891 void insertVSETVLI(MachineBasicBlock &MBB,
893 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
894
895 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
896 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
897 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
898 VSETVLIInfo &Info) const;
899 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
900 void emitVSETVLIs(MachineBasicBlock &MBB);
901 void doPRE(MachineBasicBlock &MBB);
902 void insertReadVL(MachineBasicBlock &MBB);
903
904 bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
905 const DemandedFields &Used) const;
906 void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
907
908 VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
909 VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
910 void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
911};
912
913} // end anonymous namespace
914
915char RISCVInsertVSETVLI::ID = 0;
916char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
917
919 false, false)
920
921// If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can
922// replace the AVL operand with the AVL of the defining vsetvli. E.g.
923//
924// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
925// $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1
926// ->
927// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
928// $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
929void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const {
930 if (!Info.hasAVLReg())
931 return;
932 const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
933 if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI))
934 return;
935 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
936 if (!DefInstrInfo.hasSameVLMAX(Info))
937 return;
938 Info.setAVL(DefInstrInfo);
939}
940
941// Return a VSETVLIInfo representing the changes made by this VSETVLI or
942// VSETIVLI instruction.
943VSETVLIInfo
944RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
945 VSETVLIInfo NewInfo;
946 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
947 NewInfo.setAVLImm(MI.getOperand(1).getImm());
948 } else {
949 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
950 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
951 if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)
952 NewInfo.setAVLVLMAX();
953 else if (MI.getOperand(1).isUndef())
954 // Otherwise use an AVL of 1 to avoid depending on previous vl.
955 NewInfo.setAVLImm(1);
956 else {
957 Register AVLReg = MI.getOperand(1).getReg();
958 VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
959 NewInfo.setAVLRegDef(VNI, AVLReg);
960 }
961 }
962 NewInfo.setVTYPE(MI.getOperand(2).getImm());
963
964 forwardVSETVLIAVL(NewInfo);
965
966 return NewInfo;
967}
968
969static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
970 RISCVVType::VLMUL VLMul) {
971 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
972 if (Fractional)
973 VLEN = VLEN / LMul;
974 else
975 VLEN = VLEN * LMul;
976 return VLEN/SEW;
977}
978
979VSETVLIInfo
980RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
981 VSETVLIInfo InstrInfo;
982 const uint64_t TSFlags = MI.getDesc().TSFlags;
983
984 bool TailAgnostic = true;
985 bool MaskAgnostic = true;
986 if (!hasUndefinedPassthru(MI)) {
987 // Start with undisturbed.
988 TailAgnostic = false;
989 MaskAgnostic = false;
990
991 // If there is a policy operand, use it.
992 if (RISCVII::hasVecPolicyOp(TSFlags)) {
993 const MachineOperand &Op = MI.getOperand(getVecPolicyOpNum(MI));
994 uint64_t Policy = Op.getImm();
995 assert(Policy <=
997 "Invalid Policy Value");
998 TailAgnostic = Policy & RISCVVType::TAIL_AGNOSTIC;
999 MaskAgnostic = Policy & RISCVVType::MASK_AGNOSTIC;
1000 }
1001
1002 if (!RISCVII::usesMaskPolicy(TSFlags))
1003 MaskAgnostic = true;
1004 }
1005
1006 RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
1007
1008 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
1009 // A Log2SEW of 0 is an operation on mask registers only.
1010 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
1011 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
1012
1013 if (RISCVII::hasVLOp(TSFlags)) {
1014 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1015 if (VLOp.isImm()) {
1016 int64_t Imm = VLOp.getImm();
1017 // Convert the VLMax sentintel to X0 register.
1018 if (Imm == RISCV::VLMaxSentinel) {
1019 // If we know the exact VLEN, see if we can use the constant encoding
1020 // for the VLMAX instead. This reduces register pressure slightly.
1021 const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul);
1022 if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
1023 InstrInfo.setAVLImm(VLMAX);
1024 else
1025 InstrInfo.setAVLVLMAX();
1026 }
1027 else
1028 InstrInfo.setAVLImm(Imm);
1029 } else if (VLOp.isUndef()) {
1030 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1031 InstrInfo.setAVLImm(1);
1032 } else {
1033 VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
1034 InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
1035 }
1036 } else {
1037 assert(RISCVInstrInfo::isScalarExtractInstr(MI) ||
1038 RISCVInstrInfo::isVExtractInstr(MI));
1039 // Pick a random value for state tracking purposes, will be ignored via
1040 // the demanded fields mechanism
1041 InstrInfo.setAVLImm(1);
1042 }
1043#ifndef NDEBUG
1044 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
1045 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
1046 }
1047#endif
1048 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
1049
1050 forwardVSETVLIAVL(InstrInfo);
1051
1052 return InstrInfo;
1053}
1054
1055void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1057 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1058
1059 ++NumInsertedVSETVL;
1060 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1061 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1062 // VLMAX.
1063 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1064 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0X0))
1066 .addReg(RISCV::X0, RegState::Kill)
1067 .addImm(Info.encodeVTYPE())
1068 .addReg(RISCV::VL, RegState::Implicit);
1069 if (LIS)
1071 return;
1072 }
1073
1074 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1075 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1076 // same, we can use the X0, X0 form.
1077 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1078 if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
1079 DefMI && RISCVInstrInfo::isVectorConfigInstr(*DefMI)) {
1080 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1081 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1082 auto MI =
1083 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0X0))
1085 .addReg(RISCV::X0, RegState::Kill)
1086 .addImm(Info.encodeVTYPE())
1087 .addReg(RISCV::VL, RegState::Implicit);
1088 if (LIS)
1090 return;
1091 }
1092 }
1093 }
1094 }
1095
1096 if (Info.hasAVLImm()) {
1097 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1099 .addImm(Info.getAVLImm())
1100 .addImm(Info.encodeVTYPE());
1101 if (LIS)
1103 return;
1104 }
1105
1106 if (Info.hasAVLVLMAX()) {
1107 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
1108 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1110 .addReg(RISCV::X0, RegState::Kill)
1111 .addImm(Info.encodeVTYPE());
1112 if (LIS) {
1114 LIS->createAndComputeVirtRegInterval(DestReg);
1115 }
1116 return;
1117 }
1118
1119 Register AVLReg = Info.getAVLReg();
1120 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1121 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1123 .addReg(AVLReg)
1124 .addImm(Info.encodeVTYPE());
1125 if (LIS) {
1127 LiveInterval &LI = LIS->getInterval(AVLReg);
1128 SlotIndex SI = LIS->getInstructionIndex(*MI).getRegSlot();
1129 const VNInfo *CurVNI = Info.getAVLVNInfo();
1130 // If the AVL value isn't live at MI, do a quick check to see if it's easily
1131 // extendable. Otherwise, we need to copy it.
1132 if (LI.getVNInfoBefore(SI) != CurVNI) {
1133 if (!LI.liveAt(SI) && LI.containsOneValue())
1134 LIS->extendToIndices(LI, SI);
1135 else {
1136 Register AVLCopyReg =
1137 MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
1138 MachineBasicBlock *MBB = LIS->getMBBFromIndex(CurVNI->def);
1140 if (CurVNI->isPHIDef())
1141 II = MBB->getFirstNonPHI();
1142 else {
1143 II = LIS->getInstructionFromIndex(CurVNI->def);
1144 II = std::next(II);
1145 }
1146 assert(II.isValid());
1147 auto AVLCopy = BuildMI(*MBB, II, DL, TII->get(RISCV::COPY), AVLCopyReg)
1148 .addReg(AVLReg);
1149 LIS->InsertMachineInstrInMaps(*AVLCopy);
1150 MI->getOperand(1).setReg(AVLCopyReg);
1151 LIS->createAndComputeVirtRegInterval(AVLCopyReg);
1152 }
1153 }
1154 }
1155}
1156
1157/// Return true if a VSETVLI is required to transition from CurInfo to Require
1158/// given a set of DemandedFields \p Used.
1159bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
1160 const VSETVLIInfo &Require,
1161 const VSETVLIInfo &CurInfo) const {
1162 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1163 return true;
1164
1165 if (CurInfo.isCompatible(Used, Require, LIS))
1166 return false;
1167
1168 return true;
1169}
1170
1171// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1172// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1173// places.
1174static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo,
1175 const VSETVLIInfo &NewInfo,
1176 DemandedFields &Demanded) {
1177 VSETVLIInfo Info = NewInfo;
1178
1179 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1180 !PrevInfo.isUnknown()) {
1181 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1182 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1183 Info.setVLMul(*NewVLMul);
1184 Demanded.LMUL = DemandedFields::LMULEqual;
1185 }
1186
1187 return Info;
1188}
1189
1190// Given an incoming state reaching MI, minimally modifies that state so that it
1191// is compatible with MI. The resulting state is guaranteed to be semantically
1192// legal for MI, but may not be the state requested by MI.
1193void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1194 const MachineInstr &MI) const {
1195 if (isVectorCopy(ST->getRegisterInfo(), MI) &&
1196 (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
1197 // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
1198 // be coalesced into another vsetvli since we won't demand any fields.
1199 VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
1200 NewInfo.setAVLImm(1);
1201 NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true);
1202 Info = NewInfo;
1203 return;
1204 }
1205
1206 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1207 return;
1208
1209 DemandedFields Demanded = getDemanded(MI, ST);
1210
1211 const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
1212 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1213 if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
1214 return;
1215
1216 const VSETVLIInfo PrevInfo = Info;
1217 if (!Info.isValid() || Info.isUnknown())
1218 Info = NewInfo;
1219
1220 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1221
1222 // If MI only demands that VL has the same zeroness, we only need to set the
1223 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1224 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1225 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1226 // variant, so we avoid the transform to prevent extending live range of an
1227 // avl register operand.
1228 // TODO: We can probably relax this for immediates.
1229 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
1230 IncomingInfo.hasSameVLMAX(PrevInfo);
1231 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1232 Info.setAVL(IncomingInfo);
1233
1234 Info.setVTYPE(
1235 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1236 .getVLMUL(),
1237 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1238 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1239 // if needed.
1240 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1241 IncomingInfo.getTailAgnostic(),
1242 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1243 IncomingInfo.getMaskAgnostic());
1244
1245 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1246 // the AVL.
1247 if (Info.hasSEWLMULRatioOnly()) {
1248 VSETVLIInfo RatiolessInfo = IncomingInfo;
1249 RatiolessInfo.setAVL(Info);
1250 Info = RatiolessInfo;
1251 }
1252}
1253
1254// Given a state with which we evaluated MI (see transferBefore above for why
1255// this might be different that the state MI requested), modify the state to
1256// reflect the changes MI might make.
1257void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1258 const MachineInstr &MI) const {
1259 if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
1260 Info = getInfoForVSETVLI(MI);
1261 return;
1262 }
1263
1264 if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
1265 // Update AVL to vl-output of the fault first load.
1266 assert(MI.getOperand(1).getReg().isVirtual());
1267 if (LIS) {
1268 auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1269 SlotIndex SI =
1271 VNInfo *VNI = LI.getVNInfoAt(SI);
1272 Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1273 } else
1274 Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
1275 return;
1276 }
1277
1278 // If this is something that updates VL/VTYPE that we don't know about, set
1279 // the state to unknown.
1280 if (MI.isCall() || MI.isInlineAsm() ||
1281 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1282 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1283 Info = VSETVLIInfo::getUnknown();
1284}
1285
1286bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1287 VSETVLIInfo &Info) const {
1288 bool HadVectorOp = false;
1289
1290 Info = BlockInfo[MBB.getNumber()].Pred;
1291 for (const MachineInstr &MI : MBB) {
1292 transferBefore(Info, MI);
1293
1294 if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
1295 RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
1296 isVectorCopy(ST->getRegisterInfo(), MI))
1297 HadVectorOp = true;
1298
1299 transferAfter(Info, MI);
1300 }
1301
1302 return HadVectorOp;
1303}
1304
1305void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1306
1307 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1308
1309 BBInfo.InQueue = false;
1310
1311 // Start with the previous entry so that we keep the most conservative state
1312 // we have ever found.
1313 VSETVLIInfo InInfo = BBInfo.Pred;
1314 if (MBB.pred_empty()) {
1315 // There are no predecessors, so use the default starting status.
1316 InInfo.setUnknown();
1317 } else {
1318 for (MachineBasicBlock *P : MBB.predecessors())
1319 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1320 }
1321
1322 // If we don't have any valid predecessor value, wait until we do.
1323 if (!InInfo.isValid())
1324 return;
1325
1326 // If no change, no need to rerun block
1327 if (InInfo == BBInfo.Pred)
1328 return;
1329
1330 BBInfo.Pred = InInfo;
1331 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1332 << " changed to " << BBInfo.Pred << "\n");
1333
1334 // Note: It's tempting to cache the state changes here, but due to the
1335 // compatibility checks performed a blocks output state can change based on
1336 // the input state. To cache, we'd have to add logic for finding
1337 // never-compatible state changes.
1338 VSETVLIInfo TmpStatus;
1339 computeVLVTYPEChanges(MBB, TmpStatus);
1340
1341 // If the new exit value matches the old exit value, we don't need to revisit
1342 // any blocks.
1343 if (BBInfo.Exit == TmpStatus)
1344 return;
1345
1346 BBInfo.Exit = TmpStatus;
1347 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1348 << " changed to " << BBInfo.Exit << "\n");
1349
1350 // Add the successors to the work list so we can propagate the changed exit
1351 // status.
1352 for (MachineBasicBlock *S : MBB.successors())
1353 if (!BlockInfo[S->getNumber()].InQueue) {
1354 BlockInfo[S->getNumber()].InQueue = true;
1355 WorkList.push(S);
1356 }
1357}
1358
1359// If we weren't able to prove a vsetvli was directly unneeded, it might still
1360// be unneeded if the AVL was a phi node where all incoming values are VL
1361// outputs from the last VSETVLI in their respective basic blocks.
1362bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1363 const MachineBasicBlock &MBB) const {
1364 if (!Require.hasAVLReg())
1365 return true;
1366
1367 if (!LIS)
1368 return true;
1369
1370 // We need the AVL to have been produced by a PHI node in this basic block.
1371 const VNInfo *Valno = Require.getAVLVNInfo();
1372 if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
1373 return true;
1374
1375 const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
1376
1377 for (auto *PBB : MBB.predecessors()) {
1378 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1379
1380 // We need the PHI input to the be the output of a VSET(I)VLI.
1381 const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
1382 if (!Value)
1383 return true;
1384 MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def);
1385 if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI))
1386 return true;
1387
1388 // We found a VSET(I)VLI make sure it matches the output of the
1389 // predecessor block.
1390 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1391 if (DefInfo != PBBExit)
1392 return true;
1393
1394 // Require has the same VL as PBBExit, so if the exit from the
1395 // predecessor has the VTYPE we are looking for we might be able
1396 // to avoid a VSETVLI.
1397 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1398 return true;
1399 }
1400
1401 // If all the incoming values to the PHI checked out, we don't need
1402 // to insert a VSETVLI.
1403 return false;
1404}
1405
1406void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1407 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1408 // Track whether the prefix of the block we've scanned is transparent
1409 // (meaning has not yet changed the abstract state).
1410 bool PrefixTransparent = true;
1411 for (MachineInstr &MI : MBB) {
1412 const VSETVLIInfo PrevInfo = CurInfo;
1413 transferBefore(CurInfo, MI);
1414
1415 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1416 if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
1417 // Conservatively, mark the VL and VTYPE as live.
1418 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1419 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1420 "Unexpected operands where VL and VTYPE should be");
1421 MI.getOperand(3).setIsDead(false);
1422 MI.getOperand(4).setIsDead(false);
1423 PrefixTransparent = false;
1424 }
1425
1427 isVectorCopy(ST->getRegisterInfo(), MI)) {
1428 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1429 insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1430 PrefixTransparent = false;
1431 }
1432 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1433 /*isImp*/ true));
1434 }
1435
1436 uint64_t TSFlags = MI.getDesc().TSFlags;
1437 if (RISCVII::hasSEWOp(TSFlags)) {
1438 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1439 // If this is the first implicit state change, and the state change
1440 // requested can be proven to produce the same register contents, we
1441 // can skip emitting the actual state change and continue as if we
1442 // had since we know the GPR result of the implicit state change
1443 // wouldn't be used and VL/VTYPE registers are correct. Note that
1444 // we *do* need to model the state as if it changed as while the
1445 // register contents are unchanged, the abstract model can change.
1446 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1447 insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1448 PrefixTransparent = false;
1449 }
1450
1451 if (RISCVII::hasVLOp(TSFlags)) {
1452 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1453 if (VLOp.isReg()) {
1454 Register Reg = VLOp.getReg();
1455
1456 // Erase the AVL operand from the instruction.
1457 VLOp.setReg(RISCV::NoRegister);
1458 VLOp.setIsKill(false);
1459 if (LIS) {
1460 LiveInterval &LI = LIS->getInterval(Reg);
1462 LIS->shrinkToUses(&LI, &DeadMIs);
1463 // We might have separate components that need split due to
1464 // needVSETVLIPHI causing us to skip inserting a new VL def.
1466 LIS->splitSeparateComponents(LI, SplitLIs);
1467
1468 // If the AVL was an immediate > 31, then it would have been emitted
1469 // as an ADDI. However, the ADDI might not have been used in the
1470 // vsetvli, or a vsetvli might not have been emitted, so it may be
1471 // dead now.
1472 for (MachineInstr *DeadMI : DeadMIs) {
1473 if (!TII->isAddImmediate(*DeadMI, Reg))
1474 continue;
1475 LIS->RemoveMachineInstrFromMaps(*DeadMI);
1476 DeadMI->eraseFromParent();
1477 }
1478 }
1479 }
1480 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1481 /*isImp*/ true));
1482 }
1483 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1484 /*isImp*/ true));
1485 }
1486
1487 if (MI.isInlineAsm()) {
1488 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ true,
1489 /*isImp*/ true));
1490 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ true,
1491 /*isImp*/ true));
1492 }
1493
1494 if (MI.isCall() || MI.isInlineAsm() ||
1495 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1496 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1497 PrefixTransparent = false;
1498
1499 transferAfter(CurInfo, MI);
1500 }
1501
1502 const auto &Info = BlockInfo[MBB.getNumber()];
1503 if (CurInfo != Info.Exit) {
1504 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1505 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1506 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1507 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1508 }
1509 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1510}
1511
1512/// Perform simple partial redundancy elimination of the VSETVLI instructions
1513/// we're about to insert by looking for cases where we can PRE from the
1514/// beginning of one block to the end of one of its predecessors. Specifically,
1515/// this is geared to catch the common case of a fixed length vsetvl in a single
1516/// block loop when it could execute once in the preheader instead.
1517void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1518 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1519 return;
1520
1521 MachineBasicBlock *UnavailablePred = nullptr;
1522 VSETVLIInfo AvailableInfo;
1523 for (MachineBasicBlock *P : MBB.predecessors()) {
1524 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1525 if (PredInfo.isUnknown()) {
1526 if (UnavailablePred)
1527 return;
1528 UnavailablePred = P;
1529 } else if (!AvailableInfo.isValid()) {
1530 AvailableInfo = PredInfo;
1531 } else if (AvailableInfo != PredInfo) {
1532 return;
1533 }
1534 }
1535
1536 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1537 // phase 3.
1538 if (!UnavailablePred || !AvailableInfo.isValid())
1539 return;
1540
1541 if (!LIS)
1542 return;
1543
1544 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1545 // the unavailable pred.
1546 if (AvailableInfo.hasSEWLMULRatioOnly())
1547 return;
1548
1549 // Critical edge - TODO: consider splitting?
1550 if (UnavailablePred->succ_size() != 1)
1551 return;
1552
1553 // If the AVL value is a register (other than our VLMAX sentinel),
1554 // we need to prove the value is available at the point we're going
1555 // to insert the vsetvli at.
1556 if (AvailableInfo.hasAVLReg()) {
1557 SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
1558 // This is an inline dominance check which covers the case of
1559 // UnavailablePred being the preheader of a loop.
1560 if (LIS->getMBBFromIndex(SI) != UnavailablePred)
1561 return;
1562 if (!UnavailablePred->terminators().empty() &&
1563 SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
1564 return;
1565 }
1566
1567 // Model the effect of changing the input state of the block MBB to
1568 // AvailableInfo. We're looking for two issues here; one legality,
1569 // one profitability.
1570 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1571 // may hit the end of the block with a different end state. We can
1572 // not make this change without reflowing later blocks as well.
1573 // 2) If we don't actually remove a transition, inserting a vsetvli
1574 // into the predecessor block would be correct, but unprofitable.
1575 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1576 VSETVLIInfo CurInfo = AvailableInfo;
1577 int TransitionsRemoved = 0;
1578 for (const MachineInstr &MI : MBB) {
1579 const VSETVLIInfo LastInfo = CurInfo;
1580 const VSETVLIInfo LastOldInfo = OldInfo;
1581 transferBefore(CurInfo, MI);
1582 transferBefore(OldInfo, MI);
1583 if (CurInfo == LastInfo)
1584 TransitionsRemoved++;
1585 if (LastOldInfo == OldInfo)
1586 TransitionsRemoved--;
1587 transferAfter(CurInfo, MI);
1588 transferAfter(OldInfo, MI);
1589 if (CurInfo == OldInfo)
1590 // Convergence. All transitions after this must match by construction.
1591 break;
1592 }
1593 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1594 // Issues 1 and 2 above
1595 return;
1596
1597 // Finally, update both data flow state and insert the actual vsetvli.
1598 // Doing both keeps the code in sync with the dataflow results, which
1599 // is critical for correctness of phase 3.
1600 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1601 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1602 << UnavailablePred->getName() << " with state "
1603 << AvailableInfo << "\n");
1604 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1605 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1606
1607 // Note there's an implicit assumption here that terminators never use
1608 // or modify VL or VTYPE. Also, fallthrough will return end().
1609 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1610 insertVSETVLI(*UnavailablePred, InsertPt,
1611 UnavailablePred->findDebugLoc(InsertPt),
1612 AvailableInfo, OldExit);
1613}
1614
1615// Return true if we can mutate PrevMI to match MI without changing any the
1616// fields which would be observed.
1617bool RISCVInsertVSETVLI::canMutatePriorConfig(
1618 const MachineInstr &PrevMI, const MachineInstr &MI,
1619 const DemandedFields &Used) const {
1620 // If the VL values aren't equal, return false if either a) the former is
1621 // demanded, or b) we can't rewrite the former to be the later for
1622 // implementation reasons.
1623 if (!RISCVInstrInfo::isVLPreservingConfig(MI)) {
1624 if (Used.VLAny)
1625 return false;
1626
1627 if (Used.VLZeroness) {
1628 if (RISCVInstrInfo::isVLPreservingConfig(PrevMI))
1629 return false;
1630 if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
1631 LIS))
1632 return false;
1633 }
1634
1635 auto &AVL = MI.getOperand(1);
1636
1637 // If the AVL is a register, we need to make sure its definition is the same
1638 // at PrevMI as it was at MI.
1639 if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
1640 VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS);
1641 VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS);
1642 if (!VNI || !PrevVNI || VNI != PrevVNI)
1643 return false;
1644 }
1645 }
1646
1647 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1648 auto PriorVType = PrevMI.getOperand(2).getImm();
1649 auto VType = MI.getOperand(2).getImm();
1650 return areCompatibleVTYPEs(PriorVType, VType, Used);
1651}
1652
1653void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
1654 MachineInstr *NextMI = nullptr;
1655 // We can have arbitrary code in successors, so VL and VTYPE
1656 // must be considered demanded.
1657 DemandedFields Used;
1658 Used.demandVL();
1659 Used.demandVTYPE();
1661
1662 auto dropAVLUse = [&](MachineOperand &MO) {
1663 if (!MO.isReg() || !MO.getReg().isVirtual())
1664 return;
1665 Register OldVLReg = MO.getReg();
1666 MO.setReg(RISCV::NoRegister);
1667
1668 if (LIS)
1669 LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1670
1671 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1672 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1673 MRI->use_nodbg_empty(OldVLReg))
1674 ToDelete.push_back(VLOpDef);
1675 };
1676
1677 for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
1678
1679 if (!RISCVInstrInfo::isVectorConfigInstr(MI)) {
1680 Used.doUnion(getDemanded(MI, ST));
1681 if (MI.isCall() || MI.isInlineAsm() ||
1682 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1683 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1684 NextMI = nullptr;
1685 continue;
1686 }
1687
1688 if (!MI.getOperand(0).isDead())
1689 Used.demandVL();
1690
1691 if (NextMI) {
1692 if (!Used.usedVL() && !Used.usedVTYPE()) {
1693 dropAVLUse(MI.getOperand(1));
1694 if (LIS)
1696 MI.eraseFromParent();
1697 NumCoalescedVSETVL++;
1698 // Leave NextMI unchanged
1699 continue;
1700 }
1701
1702 if (canMutatePriorConfig(MI, *NextMI, Used)) {
1703 if (!RISCVInstrInfo::isVLPreservingConfig(*NextMI)) {
1704 Register DefReg = NextMI->getOperand(0).getReg();
1705
1706 MI.getOperand(0).setReg(DefReg);
1707 MI.getOperand(0).setIsDead(false);
1708
1709 // Move the AVL from NextMI to MI
1710 dropAVLUse(MI.getOperand(1));
1711 if (NextMI->getOperand(1).isImm())
1712 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1713 else
1714 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
1715 false);
1716 dropAVLUse(NextMI->getOperand(1));
1717
1718 // The def of DefReg moved to MI, so extend the LiveInterval up to
1719 // it.
1720 if (DefReg.isVirtual() && LIS) {
1721 LiveInterval &DefLI = LIS->getInterval(DefReg);
1722 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1723 SlotIndex NextMISlot =
1724 LIS->getInstructionIndex(*NextMI).getRegSlot();
1725 VNInfo *DefVNI = DefLI.getVNInfoAt(NextMISlot);
1726 LiveInterval::Segment S(MISlot, NextMISlot, DefVNI);
1727 DefLI.addSegment(S);
1728 DefVNI->def = MISlot;
1729 // Mark DefLI as spillable if it was previously unspillable
1730 DefLI.setWeight(0);
1731
1732 // DefReg may have had no uses, in which case we need to shrink
1733 // the LiveInterval up to MI.
1734 LIS->shrinkToUses(&DefLI);
1735 }
1736
1737 MI.setDesc(NextMI->getDesc());
1738 }
1739 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1740
1741 dropAVLUse(NextMI->getOperand(1));
1742 if (LIS)
1743 LIS->RemoveMachineInstrFromMaps(*NextMI);
1744 NextMI->eraseFromParent();
1745 NumCoalescedVSETVL++;
1746 // fallthrough
1747 }
1748 }
1749 NextMI = &MI;
1750 Used = getDemanded(MI, ST);
1751 }
1752
1753 // Loop over the dead AVL values, and delete them now. This has
1754 // to be outside the above loop to avoid invalidating iterators.
1755 for (auto *MI : ToDelete) {
1756 if (LIS) {
1757 LIS->removeInterval(MI->getOperand(0).getReg());
1759 }
1760 MI->eraseFromParent();
1761 }
1762}
1763
1764void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1765 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1766 MachineInstr &MI = *I++;
1767 if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
1768 Register VLOutput = MI.getOperand(1).getReg();
1769 assert(VLOutput.isVirtual());
1770 if (!MI.getOperand(1).isDead()) {
1771 auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
1772 TII->get(RISCV::PseudoReadVL), VLOutput);
1773 // Move the LiveInterval's definition down to PseudoReadVL.
1774 if (LIS) {
1775 SlotIndex NewDefSI =
1776 LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
1777 LiveInterval &DefLI = LIS->getInterval(VLOutput);
1778 LiveRange::Segment *DefSeg = DefLI.getSegmentContaining(NewDefSI);
1779 VNInfo *DefVNI = DefLI.getVNInfoAt(DefSeg->start);
1780 DefLI.removeSegment(DefSeg->start, NewDefSI);
1781 DefVNI->def = NewDefSI;
1782 }
1783 }
1784 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1785 MI.getOperand(1).setReg(RISCV::X0);
1786 MI.addRegisterDefined(RISCV::VL, MRI->getTargetRegisterInfo());
1787 }
1788 }
1789}
1790
1791bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1792 // Skip if the vector extension is not enabled.
1793 ST = &MF.getSubtarget<RISCVSubtarget>();
1794 if (!ST->hasVInstructions())
1795 return false;
1796
1797 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1798
1799 TII = ST->getInstrInfo();
1800 MRI = &MF.getRegInfo();
1801 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
1802 LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
1803
1804 assert(BlockInfo.empty() && "Expect empty block infos");
1805 BlockInfo.resize(MF.getNumBlockIDs());
1806
1807 bool HaveVectorOp = false;
1808
1809 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1810 for (const MachineBasicBlock &MBB : MF) {
1811 VSETVLIInfo TmpStatus;
1812 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1813 // Initial exit state is whatever change we found in the block.
1814 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1815 BBInfo.Exit = TmpStatus;
1816 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1817 << " is " << BBInfo.Exit << "\n");
1818
1819 }
1820
1821 // If we didn't find any instructions that need VSETVLI, we're done.
1822 if (!HaveVectorOp) {
1823 BlockInfo.clear();
1824 return false;
1825 }
1826
1827 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1828 // blocks to the list here, but will also add any that need to be revisited
1829 // during Phase 2 processing.
1830 for (const MachineBasicBlock &MBB : MF) {
1831 WorkList.push(&MBB);
1832 BlockInfo[MBB.getNumber()].InQueue = true;
1833 }
1834 while (!WorkList.empty()) {
1835 const MachineBasicBlock &MBB = *WorkList.front();
1836 WorkList.pop();
1837 computeIncomingVLVTYPE(MBB);
1838 }
1839
1840 // Perform partial redundancy elimination of vsetvli transitions.
1841 for (MachineBasicBlock &MBB : MF)
1842 doPRE(MBB);
1843
1844 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1845 // Phase 2 information to avoid adding vsetvlis before the first vector
1846 // instruction in the block if the VL/VTYPE is satisfied by its
1847 // predecessors.
1848 for (MachineBasicBlock &MBB : MF)
1849 emitVSETVLIs(MBB);
1850
1851 // Now that all vsetvlis are explicit, go through and do block local
1852 // DSE and peephole based demanded fields based transforms. Note that
1853 // this *must* be done outside the main dataflow so long as we allow
1854 // any cross block analysis within the dataflow. We can't have both
1855 // demanded fields based mutation and non-local analysis in the
1856 // dataflow at the same time without introducing inconsistencies.
1857 // We're visiting blocks from the bottom up because a VSETVLI in the
1858 // earlier block might become dead when its uses in later blocks are
1859 // optimized away.
1860 for (MachineBasicBlock *MBB : post_order(&MF))
1861 coalesceVSETVLIs(*MBB);
1862
1863 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1864 // of VLEFF/VLSEGFF.
1865 for (MachineBasicBlock &MBB : MF)
1866 insertReadVL(MBB);
1867
1868 BlockInfo.clear();
1869 return HaveVectorOp;
1870}
1871
1872/// Returns an instance of the Insert VSETVLI pass.
1874 return new RISCVInsertVSETVLI();
1875}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition Compiler.h:236
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
static Interval intersect(const Interval &I1, const Interval &I2)
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static cl::opt< bool > EnsureWholeVectorRegisterMoveValidVTYPE(DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden, cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and " "vill is cleared"), cl::init(true))
static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo, const VSETVLIInfo &NewInfo, DemandedFields &Demanded)
#define RISCV_INSERT_VSETVLI_NAME
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI Optimize VGPR LiveRange
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
BlockData()=default
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
void setWeight(float Value)
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
LLVM_ABI void extendToIndices(LiveRange &LR, ArrayRef< SlotIndex > Indices, ArrayRef< SlotIndex > Undefs)
Extend the live range LR to reach all points in Indices.
LLVM_ABI void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl< LiveInterval * > &SplitLIs)
Split separate components in LiveInterval LI into separate intervals.
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
LLVM_ABI iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
bool liveAt(SlotIndex index) const
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
bool containsOneValue() const
LLVM_ABI void removeSegment(SlotIndex Start, SlotIndex End, bool RemoveDeadValNo=false)
Remove the specified interval from this live range.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
void push_back(const T &Elt)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
static unsigned getVecPolicyOpNum(const MCInstrDesc &Desc)
static bool usesMaskPolicy(uint64_t TSFlags)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
LLVM_ABI std::optional< VLMUL > getSameRatioLMUL(unsigned SEW, VLMUL VLMUL, unsigned EEW)
static bool isMaskAgnostic(unsigned VType)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static bool isValidSEW(unsigned SEW)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
static unsigned getSEW(unsigned VType)
static VLMUL getVLMUL(unsigned VType)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr int64_t VLMaxSentinel
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
iterator_range< po_iterator< T > > post_order(const T &G)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionPass * createRISCVInsertVSETVLIPass()
Returns an instance of the Insert VSETVLI pass.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Other
Any other memory.
Definition ModRef.h:68
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
char & RISCVInsertVSETVLIID
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
static bool isRVVRegClass(const TargetRegisterClass *RC)