LLVM 22.0.0git
RISCVInsertVSETVLI.cpp
Go to the documentation of this file.
1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
30#include "llvm/ADT/Statistic.h"
35#include <queue>
36using namespace llvm;
37
38#define DEBUG_TYPE "riscv-insert-vsetvli"
39#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
40
41STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
43
45 DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden,
46 cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and "
47 "vill is cleared"),
48 cl::init(true));
49
50namespace {
51
52/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
53/// This will return nullptr if the virtual register is an implicit_def or
54/// if LiveIntervals is not available.
55static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
56 const LiveIntervals *LIS) {
57 assert(Reg.isVirtual());
58 if (!LIS)
59 return nullptr;
60 auto &LI = LIS->getInterval(Reg);
62 return LI.getVNInfoBefore(SI);
63}
64
65static unsigned getVLOpNum(const MachineInstr &MI) {
66 return RISCVII::getVLOpNum(MI.getDesc());
67}
68
69static unsigned getSEWOpNum(const MachineInstr &MI) {
70 return RISCVII::getSEWOpNum(MI.getDesc());
71}
72
73/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
74/// not a load or store which ignores SEW.
75static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
76 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
77 default:
78 return std::nullopt;
79 case RISCV::VLE8_V:
80 case RISCV::VLSE8_V:
81 case RISCV::VSE8_V:
82 case RISCV::VSSE8_V:
83 return 8;
84 case RISCV::VLE16_V:
85 case RISCV::VLSE16_V:
86 case RISCV::VSE16_V:
87 case RISCV::VSSE16_V:
88 return 16;
89 case RISCV::VLE32_V:
90 case RISCV::VLSE32_V:
91 case RISCV::VSE32_V:
92 case RISCV::VSSE32_V:
93 return 32;
94 case RISCV::VLE64_V:
95 case RISCV::VLSE64_V:
96 case RISCV::VSE64_V:
97 case RISCV::VSSE64_V:
98 return 64;
99 }
100}
101
102/// Return true if this is an operation on mask registers. Note that
103/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
104static bool isMaskRegOp(const MachineInstr &MI) {
105 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
106 return false;
107 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
108 // A Log2SEW of 0 is an operation on mask registers only.
109 return Log2SEW == 0;
110}
111
112/// Return true if the inactive elements in the result are entirely undefined.
113/// Note that this is different from "agnostic" as defined by the vector
114/// specification. Agnostic requires each lane to either be undisturbed, or
115/// take the value -1; no other value is allowed.
116static bool hasUndefinedPassthru(const MachineInstr &MI) {
117
118 unsigned UseOpIdx;
119 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
120 // If there is no passthrough operand, then the pass through
121 // lanes are undefined.
122 return true;
123
124 // All undefined passthrus should be $noreg: see
125 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
126 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
127 return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
128}
129
130/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
131static bool isVectorCopy(const TargetRegisterInfo *TRI,
132 const MachineInstr &MI) {
133 return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&
135 TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));
136}
137
138/// Which subfields of VL or VTYPE have values we need to preserve?
139struct DemandedFields {
140 // Some unknown property of VL is used. If demanded, must preserve entire
141 // value.
142 bool VLAny = false;
143 // Only zero vs non-zero is used. If demanded, can change non-zero values.
144 bool VLZeroness = false;
145 // What properties of SEW we need to preserve.
146 enum : uint8_t {
147 SEWEqual = 3, // The exact value of SEW needs to be preserved.
148 SEWGreaterThanOrEqualAndLessThan64 =
149 2, // SEW can be changed as long as it's greater
150 // than or equal to the original value, but must be less
151 // than 64.
152 SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
153 // than or equal to the original value.
154 SEWNone = 0 // We don't need to preserve SEW at all.
155 } SEW = SEWNone;
156 enum : uint8_t {
157 LMULEqual = 2, // The exact value of LMUL needs to be preserved.
158 LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
159 LMULNone = 0 // We don't need to preserve LMUL at all.
160 } LMUL = LMULNone;
161 bool SEWLMULRatio = false;
162 bool TailPolicy = false;
163 bool MaskPolicy = false;
164 // If this is true, we demand that VTYPE is set to some legal state, i.e. that
165 // vill is unset.
166 bool VILL = false;
167
168 // Return true if any part of VTYPE was used
169 bool usedVTYPE() const {
170 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
171 }
172
173 // Return true if any property of VL was used
174 bool usedVL() {
175 return VLAny || VLZeroness;
176 }
177
178 // Mark all VTYPE subfields and properties as demanded
179 void demandVTYPE() {
180 SEW = SEWEqual;
181 LMUL = LMULEqual;
182 SEWLMULRatio = true;
183 TailPolicy = true;
184 MaskPolicy = true;
185 VILL = true;
186 }
187
188 // Mark all VL properties as demanded
189 void demandVL() {
190 VLAny = true;
191 VLZeroness = true;
192 }
193
194 static DemandedFields all() {
195 DemandedFields DF;
196 DF.demandVTYPE();
197 DF.demandVL();
198 return DF;
199 }
200
201 // Make this the result of demanding both the fields in this and B.
202 void doUnion(const DemandedFields &B) {
203 VLAny |= B.VLAny;
204 VLZeroness |= B.VLZeroness;
205 SEW = std::max(SEW, B.SEW);
206 LMUL = std::max(LMUL, B.LMUL);
207 SEWLMULRatio |= B.SEWLMULRatio;
208 TailPolicy |= B.TailPolicy;
209 MaskPolicy |= B.MaskPolicy;
210 VILL |= B.VILL;
211 }
212
213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
214 /// Support for debugging, callable in GDB: V->dump()
215 LLVM_DUMP_METHOD void dump() const {
216 print(dbgs());
217 dbgs() << "\n";
218 }
219
220 /// Implement operator<<.
221 void print(raw_ostream &OS) const {
222 OS << "{";
223 OS << "VLAny=" << VLAny << ", ";
224 OS << "VLZeroness=" << VLZeroness << ", ";
225 OS << "SEW=";
226 switch (SEW) {
227 case SEWEqual:
228 OS << "SEWEqual";
229 break;
230 case SEWGreaterThanOrEqual:
231 OS << "SEWGreaterThanOrEqual";
232 break;
233 case SEWGreaterThanOrEqualAndLessThan64:
234 OS << "SEWGreaterThanOrEqualAndLessThan64";
235 break;
236 case SEWNone:
237 OS << "SEWNone";
238 break;
239 };
240 OS << ", ";
241 OS << "LMUL=";
242 switch (LMUL) {
243 case LMULEqual:
244 OS << "LMULEqual";
245 break;
246 case LMULLessThanOrEqualToM1:
247 OS << "LMULLessThanOrEqualToM1";
248 break;
249 case LMULNone:
250 OS << "LMULNone";
251 break;
252 };
253 OS << ", ";
254 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
255 OS << "TailPolicy=" << TailPolicy << ", ";
256 OS << "MaskPolicy=" << MaskPolicy << ", ";
257 OS << "VILL=" << VILL;
258 OS << "}";
259 }
260#endif
261};
262
263#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
265inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
266 DF.print(OS);
267 return OS;
268}
269#endif
270
271static bool isLMUL1OrSmaller(RISCVVType::VLMUL LMUL) {
272 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
273 return Fractional || LMul == 1;
274}
275
276/// Return true if moving from CurVType to NewVType is
277/// indistinguishable from the perspective of an instruction (or set
278/// of instructions) which use only the Used subfields and properties.
279static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
280 const DemandedFields &Used) {
281 switch (Used.SEW) {
282 case DemandedFields::SEWNone:
283 break;
284 case DemandedFields::SEWEqual:
285 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
286 return false;
287 break;
288 case DemandedFields::SEWGreaterThanOrEqual:
289 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
290 return false;
291 break;
292 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
293 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
294 RISCVVType::getSEW(NewVType) >= 64)
295 return false;
296 break;
297 }
298
299 switch (Used.LMUL) {
300 case DemandedFields::LMULNone:
301 break;
302 case DemandedFields::LMULEqual:
303 if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
304 return false;
305 break;
306 case DemandedFields::LMULLessThanOrEqualToM1:
307 if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
308 return false;
309 break;
310 }
311
312 if (Used.SEWLMULRatio) {
313 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
314 RISCVVType::getVLMUL(CurVType));
315 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
316 RISCVVType::getVLMUL(NewVType));
317 if (Ratio1 != Ratio2)
318 return false;
319 }
320
321 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
323 return false;
324 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
326 return false;
327 return true;
328}
329
330/// Return the fields and properties demanded by the provided instruction.
331DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
332 // This function works in coalesceVSETVLI too. We can still use the value of a
333 // SEW, VL, or Policy operand even though it might not be the exact value in
334 // the VL or VTYPE, since we only care about what the instruction originally
335 // demanded.
336
337 // Most instructions don't use any of these subfeilds.
338 DemandedFields Res;
339 // Start conservative if registers are used
340 if (MI.isCall() || MI.isInlineAsm() ||
341 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
342 Res.demandVL();
343 if (MI.isCall() || MI.isInlineAsm() ||
344 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
345 Res.demandVTYPE();
346 // Start conservative on the unlowered form too
347 uint64_t TSFlags = MI.getDesc().TSFlags;
348 if (RISCVII::hasSEWOp(TSFlags)) {
349 Res.demandVTYPE();
350 if (RISCVII::hasVLOp(TSFlags))
351 if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
352 !VLOp.isReg() || !VLOp.isUndef())
353 Res.demandVL();
354
355 // Behavior is independent of mask policy.
356 if (!RISCVII::usesMaskPolicy(TSFlags))
357 Res.MaskPolicy = false;
358 }
359
360 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
361 // They instead demand the ratio of the two which is used in computing
362 // EMUL, but which allows us the flexibility to change SEW and LMUL
363 // provided we don't change the ratio.
364 // Note: We assume that the instructions initial SEW is the EEW encoded
365 // in the opcode. This is asserted when constructing the VSETVLIInfo.
366 if (getEEWForLoadStore(MI)) {
367 Res.SEW = DemandedFields::SEWNone;
368 Res.LMUL = DemandedFields::LMULNone;
369 }
370
371 // Store instructions don't use the policy fields.
372 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
373 Res.TailPolicy = false;
374 Res.MaskPolicy = false;
375 }
376
377 // If this is a mask reg operation, it only cares about VLMAX.
378 // TODO: Possible extensions to this logic
379 // * Probably ok if available VLMax is larger than demanded
380 // * The policy bits can probably be ignored..
381 if (isMaskRegOp(MI)) {
382 Res.SEW = DemandedFields::SEWNone;
383 Res.LMUL = DemandedFields::LMULNone;
384 }
385
386 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
387 if (RISCVInstrInfo::isScalarInsertInstr(MI)) {
388 Res.LMUL = DemandedFields::LMULNone;
389 Res.SEWLMULRatio = false;
390 Res.VLAny = false;
391 // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't
392 // need to preserve any other bits and are thus compatible with any larger,
393 // etype and can disregard policy bits. Warning: It's tempting to try doing
394 // this for any tail agnostic operation, but we can't as TA requires
395 // tail lanes to either be the original value or -1. We are writing
396 // unknown bits to the lanes here.
397 if (hasUndefinedPassthru(MI)) {
398 if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
399 !ST->hasVInstructionsF64())
400 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
401 else
402 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
403 Res.TailPolicy = false;
404 }
405 }
406
407 // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW.
408 if (RISCVInstrInfo::isScalarExtractInstr(MI)) {
409 assert(!RISCVII::hasVLOp(TSFlags));
410 Res.LMUL = DemandedFields::LMULNone;
411 Res.SEWLMULRatio = false;
412 Res.TailPolicy = false;
413 Res.MaskPolicy = false;
414 }
415
416 if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
417 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
418 // A slidedown/slideup with an *undefined* passthru can freely clobber
419 // elements not copied from the source vector (e.g. masked off, tail, or
420 // slideup's prefix). Notes:
421 // * We can't modify SEW here since the slide amount is in units of SEW.
422 // * VL=1 is special only because we have existing support for zero vs
423 // non-zero VL. We could generalize this if we had a VL > C predicate.
424 // * The LMUL1 restriction is for machines whose latency may depend on LMUL.
425 // * As above, this is only legal for tail "undefined" not "agnostic".
426 // * We avoid increasing vl if the subtarget has +vl-dependent-latency
427 if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() &&
428 VLOp.getImm() == 1 && hasUndefinedPassthru(MI) &&
429 !ST->hasVLDependentLatency()) {
430 Res.VLAny = false;
431 Res.VLZeroness = true;
432 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
433 Res.TailPolicy = false;
434 }
435
436 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
437 // same semantically as vmv.s.x. This is particularly useful since we don't
438 // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
439 // it's place. Since a splat is non-constant time in LMUL, we do need to be
440 // careful to not increase the number of active vector registers (unlike for
441 // vmv.s.x.)
442 if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() &&
443 VLOp.getImm() == 1 && hasUndefinedPassthru(MI) &&
444 !ST->hasVLDependentLatency()) {
445 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
446 Res.SEWLMULRatio = false;
447 Res.VLAny = false;
448 if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
449 !ST->hasVInstructionsF64())
450 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
451 else
452 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
453 Res.TailPolicy = false;
454 }
455 }
456
457 // In §32.16.6, whole vector register moves have a dependency on SEW. At the
458 // MIR level though we don't encode the element type, and it gives the same
459 // result whatever the SEW may be.
460 //
461 // However it does need valid SEW, i.e. vill must be cleared. The entry to a
462 // function, calls and inline assembly may all set it, so make sure we clear
463 // it for whole register copies. Do this by leaving VILL demanded.
464 if (isVectorCopy(ST->getRegisterInfo(), MI)) {
465 Res.LMUL = DemandedFields::LMULNone;
466 Res.SEW = DemandedFields::SEWNone;
467 Res.SEWLMULRatio = false;
468 Res.TailPolicy = false;
469 Res.MaskPolicy = false;
470 }
471
472 if (RISCVInstrInfo::isVExtractInstr(MI)) {
473 assert(!RISCVII::hasVLOp(TSFlags));
474 // TODO: LMUL can be any larger value (without cost)
475 Res.TailPolicy = false;
476 }
477
478 return Res;
479}
480
481/// Defines the abstract state with which the forward dataflow models the
482/// values of the VL and VTYPE registers after insertion.
483class VSETVLIInfo {
484 struct AVLDef {
485 // Every AVLDef should have a VNInfo, unless we're running without
486 // LiveIntervals in which case this will be nullptr.
487 const VNInfo *ValNo;
488 Register DefReg;
489 };
490 union {
491 AVLDef AVLRegDef;
492 unsigned AVLImm;
493 };
494
495 enum : uint8_t {
496 Uninitialized,
497 AVLIsReg,
498 AVLIsImm,
499 AVLIsVLMAX,
500 Unknown, // AVL and VTYPE are fully unknown
501 } State = Uninitialized;
502
503 // Fields from VTYPE.
505 uint8_t SEW = 0;
506 uint8_t TailAgnostic : 1;
507 uint8_t MaskAgnostic : 1;
508 uint8_t SEWLMULRatioOnly : 1;
509
510public:
511 VSETVLIInfo()
512 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
513 SEWLMULRatioOnly(false) {}
514
515 static VSETVLIInfo getUnknown() {
516 VSETVLIInfo Info;
517 Info.setUnknown();
518 return Info;
519 }
520
521 bool isValid() const { return State != Uninitialized; }
522 void setUnknown() { State = Unknown; }
523 bool isUnknown() const { return State == Unknown; }
524
525 void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
526 assert(AVLReg.isVirtual());
527 AVLRegDef.ValNo = VNInfo;
528 AVLRegDef.DefReg = AVLReg;
529 State = AVLIsReg;
530 }
531
532 void setAVLImm(unsigned Imm) {
533 AVLImm = Imm;
534 State = AVLIsImm;
535 }
536
537 void setAVLVLMAX() { State = AVLIsVLMAX; }
538
539 bool hasAVLImm() const { return State == AVLIsImm; }
540 bool hasAVLReg() const { return State == AVLIsReg; }
541 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
542 Register getAVLReg() const {
543 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
544 return AVLRegDef.DefReg;
545 }
546 unsigned getAVLImm() const {
547 assert(hasAVLImm());
548 return AVLImm;
549 }
550 const VNInfo *getAVLVNInfo() const {
551 assert(hasAVLReg());
552 return AVLRegDef.ValNo;
553 }
554 // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
555 // a PHI node. In that case getAVLVNInfo()->def will point to the block
556 // boundary slot and this will return nullptr. If LiveIntervals isn't
557 // available, nullptr is also returned.
558 const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
559 assert(hasAVLReg());
560 if (!LIS || getAVLVNInfo()->isPHIDef())
561 return nullptr;
562 auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
563 assert(MI);
564 return MI;
565 }
566
567 void setAVL(const VSETVLIInfo &Info) {
568 assert(Info.isValid());
569 if (Info.isUnknown())
570 setUnknown();
571 else if (Info.hasAVLReg())
572 setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
573 else if (Info.hasAVLVLMAX())
574 setAVLVLMAX();
575 else {
576 assert(Info.hasAVLImm());
577 setAVLImm(Info.getAVLImm());
578 }
579 }
580
581 unsigned getSEW() const { return SEW; }
582 RISCVVType::VLMUL getVLMUL() const { return VLMul; }
583 bool getTailAgnostic() const { return TailAgnostic; }
584 bool getMaskAgnostic() const { return MaskAgnostic; }
585
586 bool hasNonZeroAVL(const LiveIntervals *LIS) const {
587 if (hasAVLImm())
588 return getAVLImm() > 0;
589 if (hasAVLReg()) {
590 if (auto *DefMI = getAVLDefMI(LIS))
591 return RISCVInstrInfo::isNonZeroLoadImmediate(*DefMI);
592 }
593 if (hasAVLVLMAX())
594 return true;
595 return false;
596 }
597
598 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
599 const LiveIntervals *LIS) const {
600 if (hasSameAVL(Other))
601 return true;
602 return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
603 }
604
605 bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
606 if (hasAVLReg() && Other.hasAVLReg()) {
607 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
608 "we either have intervals or we don't");
609 if (!getAVLVNInfo())
610 return getAVLReg() == Other.getAVLReg();
611 return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
612 getAVLReg() == Other.getAVLReg();
613 }
614
615 if (hasAVLImm() && Other.hasAVLImm())
616 return getAVLImm() == Other.getAVLImm();
617
618 if (hasAVLVLMAX())
619 return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
620
621 return false;
622 }
623
624 // Return true if the two lattice values are guaranteed to have
625 // the same AVL value at runtime.
626 bool hasSameAVL(const VSETVLIInfo &Other) const {
627 // Without LiveIntervals, we don't know which instruction defines a
628 // register. Since a register may be redefined, this means all AVLIsReg
629 // states must be treated as possibly distinct.
630 if (hasAVLReg() && Other.hasAVLReg()) {
631 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
632 "we either have intervals or we don't");
633 if (!getAVLVNInfo())
634 return false;
635 }
636 return hasSameAVLLatticeValue(Other);
637 }
638
639 void setVTYPE(unsigned VType) {
640 assert(isValid() && !isUnknown() &&
641 "Can't set VTYPE for uninitialized or unknown");
642 VLMul = RISCVVType::getVLMUL(VType);
643 SEW = RISCVVType::getSEW(VType);
644 TailAgnostic = RISCVVType::isTailAgnostic(VType);
645 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
646 }
647 void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) {
648 assert(isValid() && !isUnknown() &&
649 "Can't set VTYPE for uninitialized or unknown");
650 VLMul = L;
651 SEW = S;
652 TailAgnostic = TA;
653 MaskAgnostic = MA;
654 }
655
656 void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; }
657
658 unsigned encodeVTYPE() const {
659 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
660 "Can't encode VTYPE for uninitialized or unknown");
661 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
662 }
663
664 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
665
666 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
667 assert(isValid() && Other.isValid() &&
668 "Can't compare invalid VSETVLIInfos");
669 assert(!isUnknown() && !Other.isUnknown() &&
670 "Can't compare VTYPE in unknown state");
671 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
672 "Can't compare when only LMUL/SEW ratio is valid.");
673 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
674 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
675 Other.MaskAgnostic);
676 }
677
678 unsigned getSEWLMULRatio() const {
679 assert(isValid() && !isUnknown() &&
680 "Can't use VTYPE for uninitialized or unknown");
681 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
682 }
683
684 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
685 // Note that having the same VLMAX ensures that both share the same
686 // function from AVL to VL; that is, they must produce the same VL value
687 // for any given AVL value.
688 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
689 assert(isValid() && Other.isValid() &&
690 "Can't compare invalid VSETVLIInfos");
691 assert(!isUnknown() && !Other.isUnknown() &&
692 "Can't compare VTYPE in unknown state");
693 return getSEWLMULRatio() == Other.getSEWLMULRatio();
694 }
695
696 bool hasCompatibleVTYPE(const DemandedFields &Used,
697 const VSETVLIInfo &Require) const {
698 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
699 }
700
701 // Determine whether the vector instructions requirements represented by
702 // Require are compatible with the previous vsetvli instruction represented
703 // by this. MI is the instruction whose requirements we're considering.
704 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
705 const LiveIntervals *LIS) const {
706 assert(isValid() && Require.isValid() &&
707 "Can't compare invalid VSETVLIInfos");
708 // Nothing is compatible with Unknown.
709 if (isUnknown() || Require.isUnknown())
710 return false;
711
712 // If only our VLMAX ratio is valid, then this isn't compatible.
713 if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
714 return false;
715
716 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
717 return false;
718
719 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
720 return false;
721
722 return hasCompatibleVTYPE(Used, Require);
723 }
724
725 bool operator==(const VSETVLIInfo &Other) const {
726 // Uninitialized is only equal to another Uninitialized.
727 if (!isValid())
728 return !Other.isValid();
729 if (!Other.isValid())
730 return !isValid();
731
732 // Unknown is only equal to another Unknown.
733 if (isUnknown())
734 return Other.isUnknown();
735 if (Other.isUnknown())
736 return isUnknown();
737
738 if (!hasSameAVLLatticeValue(Other))
739 return false;
740
741 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
742 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
743 return false;
744
745 // If only the VLMAX is valid, check that it is the same.
746 if (SEWLMULRatioOnly)
747 return hasSameVLMAX(Other);
748
749 // If the full VTYPE is valid, check that it is the same.
750 return hasSameVTYPE(Other);
751 }
752
753 bool operator!=(const VSETVLIInfo &Other) const {
754 return !(*this == Other);
755 }
756
757 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
758 // both predecessors.
759 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
760 // If the new value isn't valid, ignore it.
761 if (!Other.isValid())
762 return *this;
763
764 // If this value isn't valid, this must be the first predecessor, use it.
765 if (!isValid())
766 return Other;
767
768 // If either is unknown, the result is unknown.
769 if (isUnknown() || Other.isUnknown())
770 return VSETVLIInfo::getUnknown();
771
772 // If we have an exact, match return this.
773 if (*this == Other)
774 return *this;
775
776 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
777 // return an SEW/LMUL ratio only value.
778 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
779 VSETVLIInfo MergeInfo = *this;
780 MergeInfo.SEWLMULRatioOnly = true;
781 return MergeInfo;
782 }
783
784 // Otherwise the result is unknown.
785 return VSETVLIInfo::getUnknown();
786 }
787
788#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
789 /// Support for debugging, callable in GDB: V->dump()
790 LLVM_DUMP_METHOD void dump() const {
791 print(dbgs());
792 dbgs() << "\n";
793 }
794
795 /// Implement operator<<.
796 /// @{
797 void print(raw_ostream &OS) const {
798 OS << "{";
799 if (!isValid())
800 OS << "Uninitialized";
801 if (isUnknown())
802 OS << "unknown";
803 if (hasAVLReg())
804 OS << "AVLReg=" << llvm::printReg(getAVLReg());
805 if (hasAVLImm())
806 OS << "AVLImm=" << (unsigned)AVLImm;
807 if (hasAVLVLMAX())
808 OS << "AVLVLMAX";
809 OS << ", ";
810
811 unsigned LMul;
812 bool Fractional;
813 std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
814
815 OS << "VLMul=";
816 if (Fractional)
817 OS << "mf";
818 else
819 OS << "m";
820 OS << LMul << ", "
821 << "SEW=e" << (unsigned)SEW << ", "
822 << "TailAgnostic=" << (bool)TailAgnostic << ", "
823 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
824 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
825 }
826#endif
827};
828
829#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
831inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
832 V.print(OS);
833 return OS;
834}
835#endif
836
837struct BlockData {
838 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
839 // block. Calculated in Phase 2.
840 VSETVLIInfo Exit;
841
842 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
843 // blocks. Calculated in Phase 2, and used by Phase 3.
844 VSETVLIInfo Pred;
845
846 // Keeps track of whether the block is already in the queue.
847 bool InQueue = false;
848
849 BlockData() = default;
850};
851
852class RISCVInsertVSETVLI : public MachineFunctionPass {
853 const RISCVSubtarget *ST;
854 const TargetInstrInfo *TII;
855 MachineRegisterInfo *MRI;
856 // Possibly null!
857 LiveIntervals *LIS;
858
859 std::vector<BlockData> BlockInfo;
860 std::queue<const MachineBasicBlock *> WorkList;
861
862public:
863 static char ID;
864
865 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
866 bool runOnMachineFunction(MachineFunction &MF) override;
867
868 void getAnalysisUsage(AnalysisUsage &AU) const override {
869 AU.setPreservesCFG();
870
871 AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
872 AU.addPreserved<LiveIntervalsWrapperPass>();
873 AU.addPreserved<SlotIndexesWrapperPass>();
874 AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
875 AU.addPreserved<LiveStacksWrapperLegacy>();
876
878 }
879
880 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
881
882private:
883 bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
884 const VSETVLIInfo &CurInfo) const;
885 bool needVSETVLIPHI(const VSETVLIInfo &Require,
886 const MachineBasicBlock &MBB) const;
887 void insertVSETVLI(MachineBasicBlock &MBB,
889 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
890
891 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
892 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
893 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
894 VSETVLIInfo &Info) const;
895 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
896 void emitVSETVLIs(MachineBasicBlock &MBB);
897 void doPRE(MachineBasicBlock &MBB);
898 void insertReadVL(MachineBasicBlock &MBB);
899
900 bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
901 const DemandedFields &Used) const;
902 void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
903
904 VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
905 VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
906 void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
907};
908
909} // end anonymous namespace
910
911char RISCVInsertVSETVLI::ID = 0;
912char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
913
915 false, false)
916
917// If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can
918// replace the AVL operand with the AVL of the defining vsetvli. E.g.
919//
920// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
921// $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1
922// ->
923// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
924// $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
925void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const {
926 if (!Info.hasAVLReg())
927 return;
928 const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
929 if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI))
930 return;
931 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
932 if (!DefInstrInfo.hasSameVLMAX(Info))
933 return;
934 Info.setAVL(DefInstrInfo);
935}
936
937// Return a VSETVLIInfo representing the changes made by this VSETVLI or
938// VSETIVLI instruction.
939VSETVLIInfo
940RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
941 VSETVLIInfo NewInfo;
942 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
943 NewInfo.setAVLImm(MI.getOperand(1).getImm());
944 } else {
945 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
946 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
947 if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)
948 NewInfo.setAVLVLMAX();
949 else if (MI.getOperand(1).isUndef())
950 // Otherwise use an AVL of 1 to avoid depending on previous vl.
951 NewInfo.setAVLImm(1);
952 else {
953 Register AVLReg = MI.getOperand(1).getReg();
954 VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
955 NewInfo.setAVLRegDef(VNI, AVLReg);
956 }
957 }
958 NewInfo.setVTYPE(MI.getOperand(2).getImm());
959
960 forwardVSETVLIAVL(NewInfo);
961
962 return NewInfo;
963}
964
965static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
966 RISCVVType::VLMUL VLMul) {
967 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
968 if (Fractional)
969 VLEN = VLEN / LMul;
970 else
971 VLEN = VLEN * LMul;
972 return VLEN/SEW;
973}
974
975VSETVLIInfo
976RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
977 VSETVLIInfo InstrInfo;
978 const uint64_t TSFlags = MI.getDesc().TSFlags;
979
980 bool TailAgnostic = true;
981 bool MaskAgnostic = true;
982 if (!hasUndefinedPassthru(MI)) {
983 // Start with undisturbed.
984 TailAgnostic = false;
985 MaskAgnostic = false;
986
987 // If there is a policy operand, use it.
988 if (RISCVII::hasVecPolicyOp(TSFlags)) {
989 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
990 uint64_t Policy = Op.getImm();
991 assert(Policy <=
993 "Invalid Policy Value");
994 TailAgnostic = Policy & RISCVVType::TAIL_AGNOSTIC;
995 MaskAgnostic = Policy & RISCVVType::MASK_AGNOSTIC;
996 }
997
998 if (!RISCVII::usesMaskPolicy(TSFlags))
999 MaskAgnostic = true;
1000 }
1001
1002 RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
1003
1004 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
1005 // A Log2SEW of 0 is an operation on mask registers only.
1006 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
1007 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
1008
1009 if (RISCVII::hasVLOp(TSFlags)) {
1010 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1011 if (VLOp.isImm()) {
1012 int64_t Imm = VLOp.getImm();
1013 // Convert the VLMax sentintel to X0 register.
1014 if (Imm == RISCV::VLMaxSentinel) {
1015 // If we know the exact VLEN, see if we can use the constant encoding
1016 // for the VLMAX instead. This reduces register pressure slightly.
1017 const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul);
1018 if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
1019 InstrInfo.setAVLImm(VLMAX);
1020 else
1021 InstrInfo.setAVLVLMAX();
1022 }
1023 else
1024 InstrInfo.setAVLImm(Imm);
1025 } else if (VLOp.isUndef()) {
1026 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1027 InstrInfo.setAVLImm(1);
1028 } else {
1029 VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
1030 InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
1031 }
1032 } else {
1033 assert(RISCVInstrInfo::isScalarExtractInstr(MI) ||
1034 RISCVInstrInfo::isVExtractInstr(MI));
1035 // Pick a random value for state tracking purposes, will be ignored via
1036 // the demanded fields mechanism
1037 InstrInfo.setAVLImm(1);
1038 }
1039#ifndef NDEBUG
1040 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
1041 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
1042 }
1043#endif
1044 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
1045
1046 forwardVSETVLIAVL(InstrInfo);
1047
1048 return InstrInfo;
1049}
1050
1051void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1053 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1054
1055 ++NumInsertedVSETVL;
1056 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1057 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1058 // VLMAX.
1059 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1060 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0X0))
1062 .addReg(RISCV::X0, RegState::Kill)
1063 .addImm(Info.encodeVTYPE())
1064 .addReg(RISCV::VL, RegState::Implicit);
1065 if (LIS)
1067 return;
1068 }
1069
1070 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1071 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1072 // same, we can use the X0, X0 form.
1073 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1074 if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
1075 DefMI && RISCVInstrInfo::isVectorConfigInstr(*DefMI)) {
1076 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1077 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1078 auto MI =
1079 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0X0))
1081 .addReg(RISCV::X0, RegState::Kill)
1082 .addImm(Info.encodeVTYPE())
1083 .addReg(RISCV::VL, RegState::Implicit);
1084 if (LIS)
1086 return;
1087 }
1088 }
1089 }
1090 }
1091
1092 if (Info.hasAVLImm()) {
1093 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1095 .addImm(Info.getAVLImm())
1096 .addImm(Info.encodeVTYPE());
1097 if (LIS)
1099 return;
1100 }
1101
1102 if (Info.hasAVLVLMAX()) {
1103 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
1104 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1106 .addReg(RISCV::X0, RegState::Kill)
1107 .addImm(Info.encodeVTYPE());
1108 if (LIS) {
1110 LIS->createAndComputeVirtRegInterval(DestReg);
1111 }
1112 return;
1113 }
1114
1115 Register AVLReg = Info.getAVLReg();
1116 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1117 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1119 .addReg(AVLReg)
1120 .addImm(Info.encodeVTYPE());
1121 if (LIS) {
1123 LiveInterval &LI = LIS->getInterval(AVLReg);
1124 SlotIndex SI = LIS->getInstructionIndex(*MI).getRegSlot();
1125 const VNInfo *CurVNI = Info.getAVLVNInfo();
1126 // If the AVL value isn't live at MI, do a quick check to see if it's easily
1127 // extendable. Otherwise, we need to copy it.
1128 if (LI.getVNInfoBefore(SI) != CurVNI) {
1129 if (!LI.liveAt(SI) && LI.containsOneValue())
1130 LIS->extendToIndices(LI, SI);
1131 else {
1132 Register AVLCopyReg =
1133 MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
1134 MachineBasicBlock *MBB = LIS->getMBBFromIndex(CurVNI->def);
1136 if (CurVNI->isPHIDef())
1137 II = MBB->getFirstNonPHI();
1138 else {
1139 II = LIS->getInstructionFromIndex(CurVNI->def);
1140 II = std::next(II);
1141 }
1142 assert(II.isValid());
1143 auto AVLCopy = BuildMI(*MBB, II, DL, TII->get(RISCV::COPY), AVLCopyReg)
1144 .addReg(AVLReg);
1145 LIS->InsertMachineInstrInMaps(*AVLCopy);
1146 MI->getOperand(1).setReg(AVLCopyReg);
1147 LIS->createAndComputeVirtRegInterval(AVLCopyReg);
1148 }
1149 }
1150 }
1151}
1152
1153/// Return true if a VSETVLI is required to transition from CurInfo to Require
1154/// given a set of DemandedFields \p Used.
1155bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
1156 const VSETVLIInfo &Require,
1157 const VSETVLIInfo &CurInfo) const {
1158 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1159 return true;
1160
1161 if (CurInfo.isCompatible(Used, Require, LIS))
1162 return false;
1163
1164 return true;
1165}
1166
1167// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1168// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1169// places.
1170static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo,
1171 const VSETVLIInfo &NewInfo,
1172 DemandedFields &Demanded) {
1173 VSETVLIInfo Info = NewInfo;
1174
1175 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1176 !PrevInfo.isUnknown()) {
1177 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1178 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1179 Info.setVLMul(*NewVLMul);
1180 Demanded.LMUL = DemandedFields::LMULEqual;
1181 }
1182
1183 return Info;
1184}
1185
1186// Given an incoming state reaching MI, minimally modifies that state so that it
1187// is compatible with MI. The resulting state is guaranteed to be semantically
1188// legal for MI, but may not be the state requested by MI.
1189void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1190 const MachineInstr &MI) const {
1191 if (isVectorCopy(ST->getRegisterInfo(), MI) &&
1192 (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
1193 // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
1194 // be coalesced into another vsetvli since we won't demand any fields.
1195 VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
1196 NewInfo.setAVLImm(1);
1197 NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true);
1198 Info = NewInfo;
1199 return;
1200 }
1201
1202 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1203 return;
1204
1205 DemandedFields Demanded = getDemanded(MI, ST);
1206
1207 const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
1208 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1209 if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
1210 return;
1211
1212 const VSETVLIInfo PrevInfo = Info;
1213 if (!Info.isValid() || Info.isUnknown())
1214 Info = NewInfo;
1215
1216 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1217
1218 // If MI only demands that VL has the same zeroness, we only need to set the
1219 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1220 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1221 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1222 // variant, so we avoid the transform to prevent extending live range of an
1223 // avl register operand.
1224 // TODO: We can probably relax this for immediates.
1225 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
1226 IncomingInfo.hasSameVLMAX(PrevInfo);
1227 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1228 Info.setAVL(IncomingInfo);
1229
1230 Info.setVTYPE(
1231 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1232 .getVLMUL(),
1233 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1234 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1235 // if needed.
1236 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1237 IncomingInfo.getTailAgnostic(),
1238 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1239 IncomingInfo.getMaskAgnostic());
1240
1241 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1242 // the AVL.
1243 if (Info.hasSEWLMULRatioOnly()) {
1244 VSETVLIInfo RatiolessInfo = IncomingInfo;
1245 RatiolessInfo.setAVL(Info);
1246 Info = RatiolessInfo;
1247 }
1248}
1249
1250// Given a state with which we evaluated MI (see transferBefore above for why
1251// this might be different that the state MI requested), modify the state to
1252// reflect the changes MI might make.
1253void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1254 const MachineInstr &MI) const {
1255 if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
1256 Info = getInfoForVSETVLI(MI);
1257 return;
1258 }
1259
1260 if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
1261 // Update AVL to vl-output of the fault first load.
1262 assert(MI.getOperand(1).getReg().isVirtual());
1263 if (LIS) {
1264 auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1265 SlotIndex SI =
1267 VNInfo *VNI = LI.getVNInfoAt(SI);
1268 Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1269 } else
1270 Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
1271 return;
1272 }
1273
1274 // If this is something that updates VL/VTYPE that we don't know about, set
1275 // the state to unknown.
1276 if (MI.isCall() || MI.isInlineAsm() ||
1277 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1278 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1279 Info = VSETVLIInfo::getUnknown();
1280}
1281
1282bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1283 VSETVLIInfo &Info) const {
1284 bool HadVectorOp = false;
1285
1286 Info = BlockInfo[MBB.getNumber()].Pred;
1287 for (const MachineInstr &MI : MBB) {
1288 transferBefore(Info, MI);
1289
1290 if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
1291 RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
1292 isVectorCopy(ST->getRegisterInfo(), MI))
1293 HadVectorOp = true;
1294
1295 transferAfter(Info, MI);
1296 }
1297
1298 return HadVectorOp;
1299}
1300
1301void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1302
1303 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1304
1305 BBInfo.InQueue = false;
1306
1307 // Start with the previous entry so that we keep the most conservative state
1308 // we have ever found.
1309 VSETVLIInfo InInfo = BBInfo.Pred;
1310 if (MBB.pred_empty()) {
1311 // There are no predecessors, so use the default starting status.
1312 InInfo.setUnknown();
1313 } else {
1314 for (MachineBasicBlock *P : MBB.predecessors())
1315 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1316 }
1317
1318 // If we don't have any valid predecessor value, wait until we do.
1319 if (!InInfo.isValid())
1320 return;
1321
1322 // If no change, no need to rerun block
1323 if (InInfo == BBInfo.Pred)
1324 return;
1325
1326 BBInfo.Pred = InInfo;
1327 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1328 << " changed to " << BBInfo.Pred << "\n");
1329
1330 // Note: It's tempting to cache the state changes here, but due to the
1331 // compatibility checks performed a blocks output state can change based on
1332 // the input state. To cache, we'd have to add logic for finding
1333 // never-compatible state changes.
1334 VSETVLIInfo TmpStatus;
1335 computeVLVTYPEChanges(MBB, TmpStatus);
1336
1337 // If the new exit value matches the old exit value, we don't need to revisit
1338 // any blocks.
1339 if (BBInfo.Exit == TmpStatus)
1340 return;
1341
1342 BBInfo.Exit = TmpStatus;
1343 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1344 << " changed to " << BBInfo.Exit << "\n");
1345
1346 // Add the successors to the work list so we can propagate the changed exit
1347 // status.
1348 for (MachineBasicBlock *S : MBB.successors())
1349 if (!BlockInfo[S->getNumber()].InQueue) {
1350 BlockInfo[S->getNumber()].InQueue = true;
1351 WorkList.push(S);
1352 }
1353}
1354
1355// If we weren't able to prove a vsetvli was directly unneeded, it might still
1356// be unneeded if the AVL was a phi node where all incoming values are VL
1357// outputs from the last VSETVLI in their respective basic blocks.
1358bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1359 const MachineBasicBlock &MBB) const {
1360 if (!Require.hasAVLReg())
1361 return true;
1362
1363 if (!LIS)
1364 return true;
1365
1366 // We need the AVL to have been produced by a PHI node in this basic block.
1367 const VNInfo *Valno = Require.getAVLVNInfo();
1368 if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
1369 return true;
1370
1371 const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
1372
1373 for (auto *PBB : MBB.predecessors()) {
1374 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1375
1376 // We need the PHI input to the be the output of a VSET(I)VLI.
1377 const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
1378 if (!Value)
1379 return true;
1380 MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def);
1381 if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI))
1382 return true;
1383
1384 // We found a VSET(I)VLI make sure it matches the output of the
1385 // predecessor block.
1386 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1387 if (DefInfo != PBBExit)
1388 return true;
1389
1390 // Require has the same VL as PBBExit, so if the exit from the
1391 // predecessor has the VTYPE we are looking for we might be able
1392 // to avoid a VSETVLI.
1393 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1394 return true;
1395 }
1396
1397 // If all the incoming values to the PHI checked out, we don't need
1398 // to insert a VSETVLI.
1399 return false;
1400}
1401
1402void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1403 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1404 // Track whether the prefix of the block we've scanned is transparent
1405 // (meaning has not yet changed the abstract state).
1406 bool PrefixTransparent = true;
1407 for (MachineInstr &MI : MBB) {
1408 const VSETVLIInfo PrevInfo = CurInfo;
1409 transferBefore(CurInfo, MI);
1410
1411 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1412 if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
1413 // Conservatively, mark the VL and VTYPE as live.
1414 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1415 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1416 "Unexpected operands where VL and VTYPE should be");
1417 MI.getOperand(3).setIsDead(false);
1418 MI.getOperand(4).setIsDead(false);
1419 PrefixTransparent = false;
1420 }
1421
1423 isVectorCopy(ST->getRegisterInfo(), MI)) {
1424 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1425 insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1426 PrefixTransparent = false;
1427 }
1428 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1429 /*isImp*/ true));
1430 }
1431
1432 uint64_t TSFlags = MI.getDesc().TSFlags;
1433 if (RISCVII::hasSEWOp(TSFlags)) {
1434 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1435 // If this is the first implicit state change, and the state change
1436 // requested can be proven to produce the same register contents, we
1437 // can skip emitting the actual state change and continue as if we
1438 // had since we know the GPR result of the implicit state change
1439 // wouldn't be used and VL/VTYPE registers are correct. Note that
1440 // we *do* need to model the state as if it changed as while the
1441 // register contents are unchanged, the abstract model can change.
1442 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1443 insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1444 PrefixTransparent = false;
1445 }
1446
1447 if (RISCVII::hasVLOp(TSFlags)) {
1448 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1449 if (VLOp.isReg()) {
1450 Register Reg = VLOp.getReg();
1451
1452 // Erase the AVL operand from the instruction.
1453 VLOp.setReg(RISCV::NoRegister);
1454 VLOp.setIsKill(false);
1455 if (LIS) {
1456 LiveInterval &LI = LIS->getInterval(Reg);
1458 LIS->shrinkToUses(&LI, &DeadMIs);
1459 // We might have separate components that need split due to
1460 // needVSETVLIPHI causing us to skip inserting a new VL def.
1462 LIS->splitSeparateComponents(LI, SplitLIs);
1463
1464 // If the AVL was an immediate > 31, then it would have been emitted
1465 // as an ADDI. However, the ADDI might not have been used in the
1466 // vsetvli, or a vsetvli might not have been emitted, so it may be
1467 // dead now.
1468 for (MachineInstr *DeadMI : DeadMIs) {
1469 if (!TII->isAddImmediate(*DeadMI, Reg))
1470 continue;
1471 LIS->RemoveMachineInstrFromMaps(*DeadMI);
1472 DeadMI->eraseFromParent();
1473 }
1474 }
1475 }
1476 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1477 /*isImp*/ true));
1478 }
1479 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1480 /*isImp*/ true));
1481 }
1482
1483 if (MI.isInlineAsm()) {
1484 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ true,
1485 /*isImp*/ true));
1486 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ true,
1487 /*isImp*/ true));
1488 }
1489
1490 if (MI.isCall() || MI.isInlineAsm() ||
1491 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1492 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1493 PrefixTransparent = false;
1494
1495 transferAfter(CurInfo, MI);
1496 }
1497
1498 const auto &Info = BlockInfo[MBB.getNumber()];
1499 if (CurInfo != Info.Exit) {
1500 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1501 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1502 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1503 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1504 }
1505 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1506}
1507
1508/// Perform simple partial redundancy elimination of the VSETVLI instructions
1509/// we're about to insert by looking for cases where we can PRE from the
1510/// beginning of one block to the end of one of its predecessors. Specifically,
1511/// this is geared to catch the common case of a fixed length vsetvl in a single
1512/// block loop when it could execute once in the preheader instead.
1513void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1514 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1515 return;
1516
1517 MachineBasicBlock *UnavailablePred = nullptr;
1518 VSETVLIInfo AvailableInfo;
1519 for (MachineBasicBlock *P : MBB.predecessors()) {
1520 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1521 if (PredInfo.isUnknown()) {
1522 if (UnavailablePred)
1523 return;
1524 UnavailablePred = P;
1525 } else if (!AvailableInfo.isValid()) {
1526 AvailableInfo = PredInfo;
1527 } else if (AvailableInfo != PredInfo) {
1528 return;
1529 }
1530 }
1531
1532 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1533 // phase 3.
1534 if (!UnavailablePred || !AvailableInfo.isValid())
1535 return;
1536
1537 if (!LIS)
1538 return;
1539
1540 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1541 // the unavailable pred.
1542 if (AvailableInfo.hasSEWLMULRatioOnly())
1543 return;
1544
1545 // Critical edge - TODO: consider splitting?
1546 if (UnavailablePred->succ_size() != 1)
1547 return;
1548
1549 // If the AVL value is a register (other than our VLMAX sentinel),
1550 // we need to prove the value is available at the point we're going
1551 // to insert the vsetvli at.
1552 if (AvailableInfo.hasAVLReg()) {
1553 SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
1554 // This is an inline dominance check which covers the case of
1555 // UnavailablePred being the preheader of a loop.
1556 if (LIS->getMBBFromIndex(SI) != UnavailablePred)
1557 return;
1558 if (!UnavailablePred->terminators().empty() &&
1559 SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
1560 return;
1561 }
1562
1563 // Model the effect of changing the input state of the block MBB to
1564 // AvailableInfo. We're looking for two issues here; one legality,
1565 // one profitability.
1566 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1567 // may hit the end of the block with a different end state. We can
1568 // not make this change without reflowing later blocks as well.
1569 // 2) If we don't actually remove a transition, inserting a vsetvli
1570 // into the predecessor block would be correct, but unprofitable.
1571 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1572 VSETVLIInfo CurInfo = AvailableInfo;
1573 int TransitionsRemoved = 0;
1574 for (const MachineInstr &MI : MBB) {
1575 const VSETVLIInfo LastInfo = CurInfo;
1576 const VSETVLIInfo LastOldInfo = OldInfo;
1577 transferBefore(CurInfo, MI);
1578 transferBefore(OldInfo, MI);
1579 if (CurInfo == LastInfo)
1580 TransitionsRemoved++;
1581 if (LastOldInfo == OldInfo)
1582 TransitionsRemoved--;
1583 transferAfter(CurInfo, MI);
1584 transferAfter(OldInfo, MI);
1585 if (CurInfo == OldInfo)
1586 // Convergence. All transitions after this must match by construction.
1587 break;
1588 }
1589 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1590 // Issues 1 and 2 above
1591 return;
1592
1593 // Finally, update both data flow state and insert the actual vsetvli.
1594 // Doing both keeps the code in sync with the dataflow results, which
1595 // is critical for correctness of phase 3.
1596 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1597 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1598 << UnavailablePred->getName() << " with state "
1599 << AvailableInfo << "\n");
1600 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1601 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1602
1603 // Note there's an implicit assumption here that terminators never use
1604 // or modify VL or VTYPE. Also, fallthrough will return end().
1605 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1606 insertVSETVLI(*UnavailablePred, InsertPt,
1607 UnavailablePred->findDebugLoc(InsertPt),
1608 AvailableInfo, OldExit);
1609}
1610
1611// Return true if we can mutate PrevMI to match MI without changing any the
1612// fields which would be observed.
1613bool RISCVInsertVSETVLI::canMutatePriorConfig(
1614 const MachineInstr &PrevMI, const MachineInstr &MI,
1615 const DemandedFields &Used) const {
1616 // If the VL values aren't equal, return false if either a) the former is
1617 // demanded, or b) we can't rewrite the former to be the later for
1618 // implementation reasons.
1619 if (!RISCVInstrInfo::isVLPreservingConfig(MI)) {
1620 if (Used.VLAny)
1621 return false;
1622
1623 if (Used.VLZeroness) {
1624 if (RISCVInstrInfo::isVLPreservingConfig(PrevMI))
1625 return false;
1626 if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
1627 LIS))
1628 return false;
1629 }
1630
1631 auto &AVL = MI.getOperand(1);
1632
1633 // If the AVL is a register, we need to make sure its definition is the same
1634 // at PrevMI as it was at MI.
1635 if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
1636 VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS);
1637 VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS);
1638 if (!VNI || !PrevVNI || VNI != PrevVNI)
1639 return false;
1640 }
1641 }
1642
1643 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1644 auto PriorVType = PrevMI.getOperand(2).getImm();
1645 auto VType = MI.getOperand(2).getImm();
1646 return areCompatibleVTYPEs(PriorVType, VType, Used);
1647}
1648
1649void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
1650 MachineInstr *NextMI = nullptr;
1651 // We can have arbitrary code in successors, so VL and VTYPE
1652 // must be considered demanded.
1653 DemandedFields Used;
1654 Used.demandVL();
1655 Used.demandVTYPE();
1657
1658 auto dropAVLUse = [&](MachineOperand &MO) {
1659 if (!MO.isReg() || !MO.getReg().isVirtual())
1660 return;
1661 Register OldVLReg = MO.getReg();
1662 MO.setReg(RISCV::NoRegister);
1663
1664 if (LIS)
1665 LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1666
1667 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1668 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1669 MRI->use_nodbg_empty(OldVLReg))
1670 ToDelete.push_back(VLOpDef);
1671 };
1672
1673 for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
1674
1675 if (!RISCVInstrInfo::isVectorConfigInstr(MI)) {
1676 Used.doUnion(getDemanded(MI, ST));
1677 if (MI.isCall() || MI.isInlineAsm() ||
1678 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1679 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1680 NextMI = nullptr;
1681 continue;
1682 }
1683
1684 if (!MI.getOperand(0).isDead())
1685 Used.demandVL();
1686
1687 if (NextMI) {
1688 if (!Used.usedVL() && !Used.usedVTYPE()) {
1689 dropAVLUse(MI.getOperand(1));
1690 if (LIS)
1692 MI.eraseFromParent();
1693 NumCoalescedVSETVL++;
1694 // Leave NextMI unchanged
1695 continue;
1696 }
1697
1698 if (canMutatePriorConfig(MI, *NextMI, Used)) {
1699 if (!RISCVInstrInfo::isVLPreservingConfig(*NextMI)) {
1700 Register DefReg = NextMI->getOperand(0).getReg();
1701
1702 MI.getOperand(0).setReg(DefReg);
1703 MI.getOperand(0).setIsDead(false);
1704
1705 // Move the AVL from NextMI to MI
1706 dropAVLUse(MI.getOperand(1));
1707 if (NextMI->getOperand(1).isImm())
1708 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1709 else
1710 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
1711 false);
1712 dropAVLUse(NextMI->getOperand(1));
1713
1714 // The def of DefReg moved to MI, so extend the LiveInterval up to
1715 // it.
1716 if (DefReg.isVirtual() && LIS) {
1717 LiveInterval &DefLI = LIS->getInterval(DefReg);
1718 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1719 SlotIndex NextMISlot =
1720 LIS->getInstructionIndex(*NextMI).getRegSlot();
1721 VNInfo *DefVNI = DefLI.getVNInfoAt(NextMISlot);
1722 LiveInterval::Segment S(MISlot, NextMISlot, DefVNI);
1723 DefLI.addSegment(S);
1724 DefVNI->def = MISlot;
1725 // Mark DefLI as spillable if it was previously unspillable
1726 DefLI.setWeight(0);
1727
1728 // DefReg may have had no uses, in which case we need to shrink
1729 // the LiveInterval up to MI.
1730 LIS->shrinkToUses(&DefLI);
1731 }
1732
1733 MI.setDesc(NextMI->getDesc());
1734 }
1735 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1736
1737 dropAVLUse(NextMI->getOperand(1));
1738 if (LIS)
1739 LIS->RemoveMachineInstrFromMaps(*NextMI);
1740 NextMI->eraseFromParent();
1741 NumCoalescedVSETVL++;
1742 // fallthrough
1743 }
1744 }
1745 NextMI = &MI;
1746 Used = getDemanded(MI, ST);
1747 }
1748
1749 // Loop over the dead AVL values, and delete them now. This has
1750 // to be outside the above loop to avoid invalidating iterators.
1751 for (auto *MI : ToDelete) {
1752 if (LIS) {
1753 LIS->removeInterval(MI->getOperand(0).getReg());
1755 }
1756 MI->eraseFromParent();
1757 }
1758}
1759
1760void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1761 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1762 MachineInstr &MI = *I++;
1763 if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
1764 Register VLOutput = MI.getOperand(1).getReg();
1765 assert(VLOutput.isVirtual());
1766 if (!MI.getOperand(1).isDead()) {
1767 auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
1768 TII->get(RISCV::PseudoReadVL), VLOutput);
1769 // Move the LiveInterval's definition down to PseudoReadVL.
1770 if (LIS) {
1771 SlotIndex NewDefSI =
1772 LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
1773 LiveInterval &DefLI = LIS->getInterval(VLOutput);
1774 LiveRange::Segment *DefSeg = DefLI.getSegmentContaining(NewDefSI);
1775 VNInfo *DefVNI = DefLI.getVNInfoAt(DefSeg->start);
1776 DefLI.removeSegment(DefSeg->start, NewDefSI);
1777 DefVNI->def = NewDefSI;
1778 }
1779 }
1780 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1781 MI.getOperand(1).setReg(RISCV::X0);
1782 MI.addRegisterDefined(RISCV::VL, MRI->getTargetRegisterInfo());
1783 }
1784 }
1785}
1786
1787bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1788 // Skip if the vector extension is not enabled.
1789 ST = &MF.getSubtarget<RISCVSubtarget>();
1790 if (!ST->hasVInstructions())
1791 return false;
1792
1793 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1794
1795 TII = ST->getInstrInfo();
1796 MRI = &MF.getRegInfo();
1797 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
1798 LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
1799
1800 assert(BlockInfo.empty() && "Expect empty block infos");
1801 BlockInfo.resize(MF.getNumBlockIDs());
1802
1803 bool HaveVectorOp = false;
1804
1805 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1806 for (const MachineBasicBlock &MBB : MF) {
1807 VSETVLIInfo TmpStatus;
1808 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1809 // Initial exit state is whatever change we found in the block.
1810 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1811 BBInfo.Exit = TmpStatus;
1812 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1813 << " is " << BBInfo.Exit << "\n");
1814
1815 }
1816
1817 // If we didn't find any instructions that need VSETVLI, we're done.
1818 if (!HaveVectorOp) {
1819 BlockInfo.clear();
1820 return false;
1821 }
1822
1823 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1824 // blocks to the list here, but will also add any that need to be revisited
1825 // during Phase 2 processing.
1826 for (const MachineBasicBlock &MBB : MF) {
1827 WorkList.push(&MBB);
1828 BlockInfo[MBB.getNumber()].InQueue = true;
1829 }
1830 while (!WorkList.empty()) {
1831 const MachineBasicBlock &MBB = *WorkList.front();
1832 WorkList.pop();
1833 computeIncomingVLVTYPE(MBB);
1834 }
1835
1836 // Perform partial redundancy elimination of vsetvli transitions.
1837 for (MachineBasicBlock &MBB : MF)
1838 doPRE(MBB);
1839
1840 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1841 // Phase 2 information to avoid adding vsetvlis before the first vector
1842 // instruction in the block if the VL/VTYPE is satisfied by its
1843 // predecessors.
1844 for (MachineBasicBlock &MBB : MF)
1845 emitVSETVLIs(MBB);
1846
1847 // Now that all vsetvlis are explicit, go through and do block local
1848 // DSE and peephole based demanded fields based transforms. Note that
1849 // this *must* be done outside the main dataflow so long as we allow
1850 // any cross block analysis within the dataflow. We can't have both
1851 // demanded fields based mutation and non-local analysis in the
1852 // dataflow at the same time without introducing inconsistencies.
1853 // We're visiting blocks from the bottom up because a VSETVLI in the
1854 // earlier block might become dead when its uses in later blocks are
1855 // optimized away.
1856 for (MachineBasicBlock *MBB : post_order(&MF))
1857 coalesceVSETVLIs(*MBB);
1858
1859 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1860 // of VLEFF/VLSEGFF.
1861 for (MachineBasicBlock &MBB : MF)
1862 insertReadVL(MBB);
1863
1864 BlockInfo.clear();
1865 return HaveVectorOp;
1866}
1867
1868/// Returns an instance of the Insert VSETVLI pass.
1870 return new RISCVInsertVSETVLI();
1871}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition Compiler.h:236
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
static Interval intersect(const Interval &I1, const Interval &I2)
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static cl::opt< bool > EnsureWholeVectorRegisterMoveValidVTYPE(DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden, cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and " "vill is cleared"), cl::init(true))
static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo, const VSETVLIInfo &NewInfo, DemandedFields &Demanded)
#define RISCV_INSERT_VSETVLI_NAME
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI Optimize VGPR LiveRange
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
BlockData()=default
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
void setWeight(float Value)
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
LLVM_ABI void extendToIndices(LiveRange &LR, ArrayRef< SlotIndex > Indices, ArrayRef< SlotIndex > Undefs)
Extend the live range LR to reach all points in Indices.
LLVM_ABI void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl< LiveInterval * > &SplitLIs)
Split separate components in LiveInterval LI into separate intervals.
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
LLVM_ABI iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
bool liveAt(SlotIndex index) const
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
bool containsOneValue() const
LLVM_ABI void removeSegment(SlotIndex Start, SlotIndex End, bool RemoveDeadValNo=false)
Remove the specified interval from this live range.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
void push_back(const T &Elt)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
static bool usesMaskPolicy(uint64_t TSFlags)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
LLVM_ABI std::optional< VLMUL > getSameRatioLMUL(unsigned SEW, VLMUL VLMUL, unsigned EEW)
static bool isMaskAgnostic(unsigned VType)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static bool isValidSEW(unsigned SEW)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
static unsigned getSEW(unsigned VType)
static VLMUL getVLMUL(unsigned VType)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr int64_t VLMaxSentinel
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626
iterator_range< po_iterator< T > > post_order(const T &G)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:400
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionPass * createRISCVInsertVSETVLIPass()
Returns an instance of the Insert VSETVLI pass.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Other
Any other memory.
Definition ModRef.h:68
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
char & RISCVInsertVSETVLIID
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
static bool isRVVRegClass(const TargetRegisterClass *RC)