LLVM 19.0.0git
RISCVInsertVSETVLI.cpp
Go to the documentation of this file.
1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
29#include "llvm/ADT/Statistic.h"
34#include <queue>
35using namespace llvm;
36
37#define DEBUG_TYPE "riscv-insert-vsetvli"
38#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
40
41STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
43
45 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
46 cl::desc("Disable looking through phis when inserting vsetvlis."));
47
48namespace {
49
50/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
51/// This will return nullptr if the virtual register is an implicit_def.
52static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
53 const LiveIntervals *LIS) {
54 assert(Reg.isVirtual());
55 auto &LI = LIS->getInterval(Reg);
56 SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
57 return LI.getVNInfoBefore(SI);
58}
59
60static unsigned getVLOpNum(const MachineInstr &MI) {
61 return RISCVII::getVLOpNum(MI.getDesc());
62}
63
64static unsigned getSEWOpNum(const MachineInstr &MI) {
65 return RISCVII::getSEWOpNum(MI.getDesc());
66}
67
68static bool isVectorConfigInstr(const MachineInstr &MI) {
69 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
70 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
71 MI.getOpcode() == RISCV::PseudoVSETIVLI;
72}
73
74/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
75/// VL and only sets VTYPE.
76static bool isVLPreservingConfig(const MachineInstr &MI) {
77 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
78 return false;
79 assert(RISCV::X0 == MI.getOperand(1).getReg());
80 return RISCV::X0 == MI.getOperand(0).getReg();
81}
82
83static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
84 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
85 default:
86 return false;
87 case RISCV::VFMV_S_F:
88 case RISCV::VFMV_V_F:
89 return true;
90 }
91}
92
93static bool isScalarExtractInstr(const MachineInstr &MI) {
94 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
95 default:
96 return false;
97 case RISCV::VMV_X_S:
98 case RISCV::VFMV_F_S:
99 return true;
100 }
101}
102
103static bool isScalarInsertInstr(const MachineInstr &MI) {
104 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
105 default:
106 return false;
107 case RISCV::VMV_S_X:
108 case RISCV::VFMV_S_F:
109 return true;
110 }
111}
112
113static bool isScalarSplatInstr(const MachineInstr &MI) {
114 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
115 default:
116 return false;
117 case RISCV::VMV_V_I:
118 case RISCV::VMV_V_X:
119 case RISCV::VFMV_V_F:
120 return true;
121 }
122}
123
124static bool isVSlideInstr(const MachineInstr &MI) {
125 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
126 default:
127 return false;
128 case RISCV::VSLIDEDOWN_VX:
129 case RISCV::VSLIDEDOWN_VI:
130 case RISCV::VSLIDEUP_VX:
131 case RISCV::VSLIDEUP_VI:
132 return true;
133 }
134}
135
136/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
137/// not a load or store which ignores SEW.
138static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
139 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
140 default:
141 return std::nullopt;
142 case RISCV::VLE8_V:
143 case RISCV::VLSE8_V:
144 case RISCV::VSE8_V:
145 case RISCV::VSSE8_V:
146 return 8;
147 case RISCV::VLE16_V:
148 case RISCV::VLSE16_V:
149 case RISCV::VSE16_V:
150 case RISCV::VSSE16_V:
151 return 16;
152 case RISCV::VLE32_V:
153 case RISCV::VLSE32_V:
154 case RISCV::VSE32_V:
155 case RISCV::VSSE32_V:
156 return 32;
157 case RISCV::VLE64_V:
158 case RISCV::VLSE64_V:
159 case RISCV::VSE64_V:
160 case RISCV::VSSE64_V:
161 return 64;
162 }
163}
164
165static bool isNonZeroLoadImmediate(const MachineInstr &MI) {
166 return MI.getOpcode() == RISCV::ADDI &&
167 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
168 MI.getOperand(1).getReg() == RISCV::X0 &&
169 MI.getOperand(2).getImm() != 0;
170}
171
172/// Return true if this is an operation on mask registers. Note that
173/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
174static bool isMaskRegOp(const MachineInstr &MI) {
175 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
176 return false;
177 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
178 // A Log2SEW of 0 is an operation on mask registers only.
179 return Log2SEW == 0;
180}
181
182/// Return true if the inactive elements in the result are entirely undefined.
183/// Note that this is different from "agnostic" as defined by the vector
184/// specification. Agnostic requires each lane to either be undisturbed, or
185/// take the value -1; no other value is allowed.
186static bool hasUndefinedMergeOp(const MachineInstr &MI) {
187
188 unsigned UseOpIdx;
189 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
190 // If there is no passthrough operand, then the pass through
191 // lanes are undefined.
192 return true;
193
194 // All undefined passthrus should be $noreg: see
195 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
196 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
197 return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
198}
199
200/// Which subfields of VL or VTYPE have values we need to preserve?
201struct DemandedFields {
202 // Some unknown property of VL is used. If demanded, must preserve entire
203 // value.
204 bool VLAny = false;
205 // Only zero vs non-zero is used. If demanded, can change non-zero values.
206 bool VLZeroness = false;
207 // What properties of SEW we need to preserve.
208 enum : uint8_t {
209 SEWEqual = 3, // The exact value of SEW needs to be preserved.
210 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
211 // than or equal to the original value.
212 SEWGreaterThanOrEqualAndLessThan64 =
213 1, // SEW can be changed as long as it's greater
214 // than or equal to the original value, but must be less
215 // than 64.
216 SEWNone = 0 // We don't need to preserve SEW at all.
217 } SEW = SEWNone;
218 bool LMUL = false;
219 bool SEWLMULRatio = false;
220 bool TailPolicy = false;
221 bool MaskPolicy = false;
222
223 // Return true if any part of VTYPE was used
224 bool usedVTYPE() const {
225 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
226 }
227
228 // Return true if any property of VL was used
229 bool usedVL() {
230 return VLAny || VLZeroness;
231 }
232
233 // Mark all VTYPE subfields and properties as demanded
234 void demandVTYPE() {
235 SEW = SEWEqual;
236 LMUL = true;
237 SEWLMULRatio = true;
238 TailPolicy = true;
239 MaskPolicy = true;
240 }
241
242 // Mark all VL properties as demanded
243 void demandVL() {
244 VLAny = true;
245 VLZeroness = true;
246 }
247
248 // Make this the result of demanding both the fields in this and B.
249 void doUnion(const DemandedFields &B) {
250 VLAny |= B.VLAny;
251 VLZeroness |= B.VLZeroness;
252 SEW = std::max(SEW, B.SEW);
253 LMUL |= B.LMUL;
254 SEWLMULRatio |= B.SEWLMULRatio;
255 TailPolicy |= B.TailPolicy;
256 MaskPolicy |= B.MaskPolicy;
257 }
258
259#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
260 /// Support for debugging, callable in GDB: V->dump()
261 LLVM_DUMP_METHOD void dump() const {
262 print(dbgs());
263 dbgs() << "\n";
264 }
265
266 /// Implement operator<<.
267 void print(raw_ostream &OS) const {
268 OS << "{";
269 OS << "VLAny=" << VLAny << ", ";
270 OS << "VLZeroness=" << VLZeroness << ", ";
271 OS << "SEW=";
272 switch (SEW) {
273 case SEWEqual:
274 OS << "SEWEqual";
275 break;
276 case SEWGreaterThanOrEqual:
277 OS << "SEWGreaterThanOrEqual";
278 break;
279 case SEWGreaterThanOrEqualAndLessThan64:
280 OS << "SEWGreaterThanOrEqualAndLessThan64";
281 break;
282 case SEWNone:
283 OS << "SEWNone";
284 break;
285 };
286 OS << ", ";
287 OS << "LMUL=" << LMUL << ", ";
288 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
289 OS << "TailPolicy=" << TailPolicy << ", ";
290 OS << "MaskPolicy=" << MaskPolicy;
291 OS << "}";
292 }
293#endif
294};
295
296#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
298inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
299 DF.print(OS);
300 return OS;
301}
302#endif
303
304/// Return true if moving from CurVType to NewVType is
305/// indistinguishable from the perspective of an instruction (or set
306/// of instructions) which use only the Used subfields and properties.
307static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
308 const DemandedFields &Used) {
309 switch (Used.SEW) {
310 case DemandedFields::SEWNone:
311 break;
312 case DemandedFields::SEWEqual:
313 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
314 return false;
315 break;
316 case DemandedFields::SEWGreaterThanOrEqual:
317 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
318 return false;
319 break;
320 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
321 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
322 RISCVVType::getSEW(NewVType) >= 64)
323 return false;
324 break;
325 }
326
327 if (Used.LMUL &&
328 RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
329 return false;
330
331 if (Used.SEWLMULRatio) {
332 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
333 RISCVVType::getVLMUL(CurVType));
334 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
335 RISCVVType::getVLMUL(NewVType));
336 if (Ratio1 != Ratio2)
337 return false;
338 }
339
340 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
342 return false;
343 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
345 return false;
346 return true;
347}
348
349/// Return the fields and properties demanded by the provided instruction.
350DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
351 // Warning: This function has to work on both the lowered (i.e. post
352 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
353 // that it can't use the value of a SEW, VL, or Policy operand as they might
354 // be stale after lowering.
355
356 // Most instructions don't use any of these subfeilds.
357 DemandedFields Res;
358 // Start conservative if registers are used
359 if (MI.isCall() || MI.isInlineAsm() ||
360 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
361 Res.demandVL();
362 if (MI.isCall() || MI.isInlineAsm() ||
363 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
364 Res.demandVTYPE();
365 // Start conservative on the unlowered form too
366 uint64_t TSFlags = MI.getDesc().TSFlags;
367 if (RISCVII::hasSEWOp(TSFlags)) {
368 Res.demandVTYPE();
369 if (RISCVII::hasVLOp(TSFlags))
370 Res.demandVL();
371
372 // Behavior is independent of mask policy.
373 if (!RISCVII::usesMaskPolicy(TSFlags))
374 Res.MaskPolicy = false;
375 }
376
377 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
378 // They instead demand the ratio of the two which is used in computing
379 // EMUL, but which allows us the flexibility to change SEW and LMUL
380 // provided we don't change the ratio.
381 // Note: We assume that the instructions initial SEW is the EEW encoded
382 // in the opcode. This is asserted when constructing the VSETVLIInfo.
383 if (getEEWForLoadStore(MI)) {
384 Res.SEW = DemandedFields::SEWNone;
385 Res.LMUL = false;
386 }
387
388 // Store instructions don't use the policy fields.
389 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
390 Res.TailPolicy = false;
391 Res.MaskPolicy = false;
392 }
393
394 // If this is a mask reg operation, it only cares about VLMAX.
395 // TODO: Possible extensions to this logic
396 // * Probably ok if available VLMax is larger than demanded
397 // * The policy bits can probably be ignored..
398 if (isMaskRegOp(MI)) {
399 Res.SEW = DemandedFields::SEWNone;
400 Res.LMUL = false;
401 }
402
403 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
404 if (isScalarInsertInstr(MI)) {
405 Res.LMUL = false;
406 Res.SEWLMULRatio = false;
407 Res.VLAny = false;
408 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
409 // need to preserve any other bits and are thus compatible with any larger,
410 // etype and can disregard policy bits. Warning: It's tempting to try doing
411 // this for any tail agnostic operation, but we can't as TA requires
412 // tail lanes to either be the original value or -1. We are writing
413 // unknown bits to the lanes here.
414 if (hasUndefinedMergeOp(MI)) {
415 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
416 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
417 else
418 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
419 Res.TailPolicy = false;
420 }
421 }
422
423 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
424 if (isScalarExtractInstr(MI)) {
425 assert(!RISCVII::hasVLOp(TSFlags));
426 Res.LMUL = false;
427 Res.SEWLMULRatio = false;
428 Res.TailPolicy = false;
429 Res.MaskPolicy = false;
430 }
431
432 return Res;
433}
434
435/// Defines the abstract state with which the forward dataflow models the
436/// values of the VL and VTYPE registers after insertion.
437class VSETVLIInfo {
438 struct AVLDef {
439 // Every AVLDef should have a VNInfo.
440 const VNInfo *ValNo;
441 Register DefReg;
442 };
443 union {
444 AVLDef AVLRegDef;
445 unsigned AVLImm;
446 };
447
448 enum : uint8_t {
450 AVLIsReg,
451 AVLIsImm,
452 AVLIsVLMAX,
453 AVLIsIgnored,
454 Unknown,
455 } State = Uninitialized;
456
457 // Fields from VTYPE.
459 uint8_t SEW = 0;
460 uint8_t TailAgnostic : 1;
461 uint8_t MaskAgnostic : 1;
462 uint8_t SEWLMULRatioOnly : 1;
463
464public:
465 VSETVLIInfo()
466 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
467 SEWLMULRatioOnly(false) {}
468
469 static VSETVLIInfo getUnknown() {
470 VSETVLIInfo Info;
471 Info.setUnknown();
472 return Info;
473 }
474
475 bool isValid() const { return State != Uninitialized; }
476 void setUnknown() { State = Unknown; }
477 bool isUnknown() const { return State == Unknown; }
478
479 void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
481 AVLRegDef.ValNo = VNInfo;
482 AVLRegDef.DefReg = AVLReg;
483 State = AVLIsReg;
484 }
485
486 void setAVLImm(unsigned Imm) {
487 AVLImm = Imm;
488 State = AVLIsImm;
489 }
490
491 void setAVLVLMAX() { State = AVLIsVLMAX; }
492
493 void setAVLIgnored() { State = AVLIsIgnored; }
494
495 bool hasAVLImm() const { return State == AVLIsImm; }
496 bool hasAVLReg() const { return State == AVLIsReg; }
497 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
498 bool hasAVLIgnored() const { return State == AVLIsIgnored; }
499 Register getAVLReg() const {
500 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
501 return AVLRegDef.DefReg;
502 }
503 unsigned getAVLImm() const {
504 assert(hasAVLImm());
505 return AVLImm;
506 }
507 const VNInfo *getAVLVNInfo() const {
508 assert(hasAVLReg());
509 return AVLRegDef.ValNo;
510 }
511 // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
512 // a PHI node. In that case getAVLVNInfo()->def will point to the block
513 // boundary slot.
514 const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
515 assert(hasAVLReg());
516 auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
517 assert(!(getAVLVNInfo()->isPHIDef() && MI));
518 return MI;
519 }
520
521 void setAVL(VSETVLIInfo Info) {
522 assert(Info.isValid());
523 if (Info.isUnknown())
524 setUnknown();
525 else if (Info.hasAVLReg())
526 setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
527 else if (Info.hasAVLVLMAX())
528 setAVLVLMAX();
529 else if (Info.hasAVLIgnored())
531 else {
532 assert(Info.hasAVLImm());
533 setAVLImm(Info.getAVLImm());
534 }
535 }
536
537 unsigned getSEW() const { return SEW; }
538 RISCVII::VLMUL getVLMUL() const { return VLMul; }
539 bool getTailAgnostic() const { return TailAgnostic; }
540 bool getMaskAgnostic() const { return MaskAgnostic; }
541
542 bool hasNonZeroAVL(const LiveIntervals *LIS) const {
543 if (hasAVLImm())
544 return getAVLImm() > 0;
545 if (hasAVLReg()) {
546 if (auto *DefMI = getAVLDefMI(LIS))
547 return isNonZeroLoadImmediate(*DefMI);
548 }
549 if (hasAVLVLMAX())
550 return true;
551 if (hasAVLIgnored())
552 return false;
553 return false;
554 }
555
556 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
557 const LiveIntervals *LIS) const {
558 if (hasSameAVL(Other))
559 return true;
560 return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
561 }
562
563 bool hasSameAVL(const VSETVLIInfo &Other) const {
564 if (hasAVLReg() && Other.hasAVLReg())
565 return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
566 getAVLReg() == Other.getAVLReg();
567
568 if (hasAVLImm() && Other.hasAVLImm())
569 return getAVLImm() == Other.getAVLImm();
570
571 if (hasAVLVLMAX())
572 return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
573
574 if (hasAVLIgnored())
575 return Other.hasAVLIgnored();
576
577 return false;
578 }
579
580 void setVTYPE(unsigned VType) {
581 assert(isValid() && !isUnknown() &&
582 "Can't set VTYPE for uninitialized or unknown");
583 VLMul = RISCVVType::getVLMUL(VType);
584 SEW = RISCVVType::getSEW(VType);
585 TailAgnostic = RISCVVType::isTailAgnostic(VType);
586 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
587 }
588 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
589 assert(isValid() && !isUnknown() &&
590 "Can't set VTYPE for uninitialized or unknown");
591 VLMul = L;
592 SEW = S;
593 TailAgnostic = TA;
594 MaskAgnostic = MA;
595 }
596
597 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
598
599 unsigned encodeVTYPE() const {
600 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
601 "Can't encode VTYPE for uninitialized or unknown");
602 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
603 }
604
605 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
606
607 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
608 assert(isValid() && Other.isValid() &&
609 "Can't compare invalid VSETVLIInfos");
610 assert(!isUnknown() && !Other.isUnknown() &&
611 "Can't compare VTYPE in unknown state");
612 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
613 "Can't compare when only LMUL/SEW ratio is valid.");
614 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
615 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
616 Other.MaskAgnostic);
617 }
618
619 unsigned getSEWLMULRatio() const {
620 assert(isValid() && !isUnknown() &&
621 "Can't use VTYPE for uninitialized or unknown");
622 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
623 }
624
625 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
626 // Note that having the same VLMAX ensures that both share the same
627 // function from AVL to VL; that is, they must produce the same VL value
628 // for any given AVL value.
629 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
630 assert(isValid() && Other.isValid() &&
631 "Can't compare invalid VSETVLIInfos");
632 assert(!isUnknown() && !Other.isUnknown() &&
633 "Can't compare VTYPE in unknown state");
634 return getSEWLMULRatio() == Other.getSEWLMULRatio();
635 }
636
637 bool hasCompatibleVTYPE(const DemandedFields &Used,
638 const VSETVLIInfo &Require) const {
639 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
640 }
641
642 // Determine whether the vector instructions requirements represented by
643 // Require are compatible with the previous vsetvli instruction represented
644 // by this. MI is the instruction whose requirements we're considering.
645 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
646 const LiveIntervals *LIS) const {
647 assert(isValid() && Require.isValid() &&
648 "Can't compare invalid VSETVLIInfos");
649 assert(!Require.SEWLMULRatioOnly &&
650 "Expected a valid VTYPE for instruction!");
651 // Nothing is compatible with Unknown.
652 if (isUnknown() || Require.isUnknown())
653 return false;
654
655 // If only our VLMAX ratio is valid, then this isn't compatible.
656 if (SEWLMULRatioOnly)
657 return false;
658
659 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
660 return false;
661
662 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
663 return false;
664
665 return hasCompatibleVTYPE(Used, Require);
666 }
667
668 bool operator==(const VSETVLIInfo &Other) const {
669 // Uninitialized is only equal to another Uninitialized.
670 if (!isValid())
671 return !Other.isValid();
672 if (!Other.isValid())
673 return !isValid();
674
675 // Unknown is only equal to another Unknown.
676 if (isUnknown())
677 return Other.isUnknown();
678 if (Other.isUnknown())
679 return isUnknown();
680
681 if (!hasSameAVL(Other))
682 return false;
683
684 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
685 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
686 return false;
687
688 // If only the VLMAX is valid, check that it is the same.
689 if (SEWLMULRatioOnly)
690 return hasSameVLMAX(Other);
691
692 // If the full VTYPE is valid, check that it is the same.
693 return hasSameVTYPE(Other);
694 }
695
696 bool operator!=(const VSETVLIInfo &Other) const {
697 return !(*this == Other);
698 }
699
700 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
701 // both predecessors.
702 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
703 // If the new value isn't valid, ignore it.
704 if (!Other.isValid())
705 return *this;
706
707 // If this value isn't valid, this must be the first predecessor, use it.
708 if (!isValid())
709 return Other;
710
711 // If either is unknown, the result is unknown.
712 if (isUnknown() || Other.isUnknown())
713 return VSETVLIInfo::getUnknown();
714
715 // If we have an exact, match return this.
716 if (*this == Other)
717 return *this;
718
719 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
720 // return an SEW/LMUL ratio only value.
721 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
722 VSETVLIInfo MergeInfo = *this;
723 MergeInfo.SEWLMULRatioOnly = true;
724 return MergeInfo;
725 }
726
727 // Otherwise the result is unknown.
728 return VSETVLIInfo::getUnknown();
729 }
730
731#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
732 /// Support for debugging, callable in GDB: V->dump()
733 LLVM_DUMP_METHOD void dump() const {
734 print(dbgs());
735 dbgs() << "\n";
736 }
737
738 /// Implement operator<<.
739 /// @{
740 void print(raw_ostream &OS) const {
741 OS << "{";
742 if (!isValid())
743 OS << "Uninitialized";
744 if (isUnknown())
745 OS << "unknown";
746 if (hasAVLReg())
747 OS << "AVLReg=" << (unsigned)getAVLReg();
748 if (hasAVLImm())
749 OS << "AVLImm=" << (unsigned)AVLImm;
750 if (hasAVLVLMAX())
751 OS << "AVLVLMAX";
752 if (hasAVLIgnored())
753 OS << "AVLIgnored";
754 OS << ", "
755 << "VLMul=" << (unsigned)VLMul << ", "
756 << "SEW=" << (unsigned)SEW << ", "
757 << "TailAgnostic=" << (bool)TailAgnostic << ", "
758 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
759 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
760 }
761#endif
762};
763
764#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
766inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
767 V.print(OS);
768 return OS;
769}
770#endif
771
772struct BlockData {
773 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
774 // block. Calculated in Phase 2.
775 VSETVLIInfo Exit;
776
777 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
778 // blocks. Calculated in Phase 2, and used by Phase 3.
779 VSETVLIInfo Pred;
780
781 // Keeps track of whether the block is already in the queue.
782 bool InQueue = false;
783
784 BlockData() = default;
785};
786
787class RISCVInsertVSETVLI : public MachineFunctionPass {
788 const RISCVSubtarget *ST;
789 const TargetInstrInfo *TII;
792
793 std::vector<BlockData> BlockInfo;
794 std::queue<const MachineBasicBlock *> WorkList;
795
796public:
797 static char ID;
798
799 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
800 bool runOnMachineFunction(MachineFunction &MF) override;
801
802 void getAnalysisUsage(AnalysisUsage &AU) const override {
803 AU.setPreservesCFG();
804
811
813 }
814
815 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
816
817private:
818 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
819 const VSETVLIInfo &CurInfo) const;
820 bool needVSETVLIPHI(const VSETVLIInfo &Require,
821 const MachineBasicBlock &MBB) const;
822 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
823 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
824 void insertVSETVLI(MachineBasicBlock &MBB,
826 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
827
828 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
829 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
830 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
831 VSETVLIInfo &Info) const;
832 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
833 void emitVSETVLIs(MachineBasicBlock &MBB);
834 void doPRE(MachineBasicBlock &MBB);
835 void insertReadVL(MachineBasicBlock &MBB);
836};
837
838class RISCVCoalesceVSETVLI : public MachineFunctionPass {
839public:
840 static char ID;
841 const RISCVSubtarget *ST;
842 const TargetInstrInfo *TII;
845
846 RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
847 bool runOnMachineFunction(MachineFunction &MF) override;
848
849 void getAnalysisUsage(AnalysisUsage &AU) const override {
850 AU.setPreservesCFG();
851
858
860 }
861
862 StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
863
864private:
865 bool coalesceVSETVLIs(MachineBasicBlock &MBB);
866};
867
868} // end anonymous namespace
869
870char RISCVInsertVSETVLI::ID = 0;
871char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
872
874 false, false)
875
876char RISCVCoalesceVSETVLI::ID = 0;
877
878INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
880
881// Return a VSETVLIInfo representing the changes made by this VSETVLI or
882// VSETIVLI instruction.
883static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI,
885 VSETVLIInfo NewInfo;
886 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
887 NewInfo.setAVLImm(MI.getOperand(1).getImm());
888 } else {
889 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
890 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
891 Register AVLReg = MI.getOperand(1).getReg();
892 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
893 "Can't handle X0, X0 vsetvli yet");
894 if (AVLReg == RISCV::X0)
895 NewInfo.setAVLVLMAX();
896 else if (VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS))
897 NewInfo.setAVLRegDef(VNI, AVLReg);
898 else {
899 assert(MI.getOperand(1).isUndef());
900 NewInfo.setAVLIgnored();
901 }
902 }
903 NewInfo.setVTYPE(MI.getOperand(2).getImm());
904
905 return NewInfo;
906}
907
908static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
909 RISCVII::VLMUL VLMul) {
910 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
911 if (Fractional)
912 VLEN = VLEN / LMul;
913 else
914 VLEN = VLEN * LMul;
915 return VLEN/SEW;
916}
917
918static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
919 const RISCVSubtarget &ST,
920 const LiveIntervals *LIS) {
921 VSETVLIInfo InstrInfo;
922
923 bool TailAgnostic = true;
924 bool MaskAgnostic = true;
925 if (!hasUndefinedMergeOp(MI)) {
926 // Start with undisturbed.
927 TailAgnostic = false;
928 MaskAgnostic = false;
929
930 // If there is a policy operand, use it.
931 if (RISCVII::hasVecPolicyOp(TSFlags)) {
932 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
933 uint64_t Policy = Op.getImm();
935 "Invalid Policy Value");
936 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
937 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
938 }
939
940 // Some pseudo instructions force a tail agnostic policy despite having a
941 // tied def.
943 TailAgnostic = true;
944
945 if (!RISCVII::usesMaskPolicy(TSFlags))
946 MaskAgnostic = true;
947 }
948
949 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
950
951 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
952 // A Log2SEW of 0 is an operation on mask registers only.
953 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
954 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
955
956 if (RISCVII::hasVLOp(TSFlags)) {
957 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
958 if (VLOp.isImm()) {
959 int64_t Imm = VLOp.getImm();
960 // Conver the VLMax sentintel to X0 register.
961 if (Imm == RISCV::VLMaxSentinel) {
962 // If we know the exact VLEN, see if we can use the constant encoding
963 // for the VLMAX instead. This reduces register pressure slightly.
964 const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul);
965 if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31)
966 InstrInfo.setAVLImm(VLMAX);
967 else
968 InstrInfo.setAVLVLMAX();
969 }
970 else
971 InstrInfo.setAVLImm(Imm);
972 } else if (VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS)) {
973 InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
974 } else {
975 assert(VLOp.isUndef());
976 InstrInfo.setAVLIgnored();
977 }
978 } else {
979 assert(isScalarExtractInstr(MI));
980 // TODO: If we are more clever about x0,x0 insertion then we should be able
981 // to deduce that the VL is ignored based off of DemandedFields, and remove
982 // the AVLIsIgnored state. Then we can just use an arbitrary immediate AVL.
983 InstrInfo.setAVLIgnored();
984 }
985#ifndef NDEBUG
986 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
987 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
988 }
989#endif
990 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
991
992 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
993 // AVL operand with the AVL of the defining vsetvli. We avoid general
994 // register AVLs to avoid extending live ranges without being sure we can
995 // kill the original source reg entirely.
996 if (InstrInfo.hasAVLReg()) {
997 if (const MachineInstr *DefMI = InstrInfo.getAVLDefMI(LIS);
998 DefMI && isVectorConfigInstr(*DefMI)) {
999 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI, LIS);
1000 if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
1001 (DefInstrInfo.hasAVLImm() || DefInstrInfo.hasAVLVLMAX()))
1002 InstrInfo.setAVL(DefInstrInfo);
1003 }
1004 }
1005
1006 return InstrInfo;
1007}
1008
1009void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
1010 const VSETVLIInfo &Info,
1011 const VSETVLIInfo &PrevInfo) {
1012 DebugLoc DL = MI.getDebugLoc();
1013 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
1014}
1015
1016void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1018 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1019
1020 ++NumInsertedVSETVL;
1021 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1022 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1023 // VLMAX.
1024 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1025 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1027 .addReg(RISCV::X0, RegState::Kill)
1028 .addImm(Info.encodeVTYPE())
1029 .addReg(RISCV::VL, RegState::Implicit);
1030 LIS->InsertMachineInstrInMaps(*MI);
1031 return;
1032 }
1033
1034 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1035 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1036 // same, we can use the X0, X0 form.
1037 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1038 if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
1039 DefMI && isVectorConfigInstr(*DefMI)) {
1040 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS);
1041 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1042 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1044 .addReg(RISCV::X0, RegState::Kill)
1045 .addImm(Info.encodeVTYPE())
1046 .addReg(RISCV::VL, RegState::Implicit);
1047 LIS->InsertMachineInstrInMaps(*MI);
1048 return;
1049 }
1050 }
1051 }
1052 }
1053
1054 if (Info.hasAVLImm()) {
1055 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1057 .addImm(Info.getAVLImm())
1058 .addImm(Info.encodeVTYPE());
1059 LIS->InsertMachineInstrInMaps(*MI);
1060 return;
1061 }
1062
1063 if (Info.hasAVLIgnored()) {
1064 // We can only use x0, x0 if there's no chance of the vtype change causing
1065 // the previous vl to become invalid.
1066 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
1067 Info.hasSameVLMAX(PrevInfo)) {
1068 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1070 .addReg(RISCV::X0, RegState::Kill)
1071 .addImm(Info.encodeVTYPE())
1072 .addReg(RISCV::VL, RegState::Implicit);
1073 LIS->InsertMachineInstrInMaps(*MI);
1074 return;
1075 }
1076 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1077 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1079 .addImm(1)
1080 .addImm(Info.encodeVTYPE());
1081 LIS->InsertMachineInstrInMaps(*MI);
1082 return;
1083 }
1084
1085 if (Info.hasAVLVLMAX()) {
1086 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
1087 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1089 .addReg(RISCV::X0, RegState::Kill)
1090 .addImm(Info.encodeVTYPE());
1091 LIS->InsertMachineInstrInMaps(*MI);
1092 LIS->createAndComputeVirtRegInterval(DestReg);
1093 return;
1094 }
1095
1096 Register AVLReg = Info.getAVLReg();
1097 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1098 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1100 .addReg(AVLReg)
1101 .addImm(Info.encodeVTYPE());
1102 LIS->InsertMachineInstrInMaps(*MI);
1103 // Normally the AVL's live range will already extend past the inserted vsetvli
1104 // because the pseudos below will already use the AVL. But this isn't always
1105 // the case, e.g. PseudoVMV_X_S doesn't have an AVL operand.
1106 LIS->getInterval(AVLReg).extendInBlock(
1107 LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot());
1108}
1109
1111 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
1112 return Fractional || LMul == 1;
1113}
1114
1115/// Return true if a VSETVLI is required to transition from CurInfo to Require
1116/// before MI.
1117bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
1118 const VSETVLIInfo &Require,
1119 const VSETVLIInfo &CurInfo) const {
1120 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, LIS));
1121
1122 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1123 return true;
1124
1125 DemandedFields Used = getDemanded(MI, ST);
1126
1127 // A slidedown/slideup with an *undefined* merge op can freely clobber
1128 // elements not copied from the source vector (e.g. masked off, tail, or
1129 // slideup's prefix). Notes:
1130 // * We can't modify SEW here since the slide amount is in units of SEW.
1131 // * VL=1 is special only because we have existing support for zero vs
1132 // non-zero VL. We could generalize this if we had a VL > C predicate.
1133 // * The LMUL1 restriction is for machines whose latency may depend on VL.
1134 // * As above, this is only legal for tail "undefined" not "agnostic".
1135 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1136 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI)) {
1137 Used.VLAny = false;
1138 Used.VLZeroness = true;
1139 Used.LMUL = false;
1140 Used.TailPolicy = false;
1141 }
1142
1143 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1144 // semantically as vmv.s.x. This is particularly useful since we don't have an
1145 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1146 // Since a splat is non-constant time in LMUL, we do need to be careful to not
1147 // increase the number of active vector registers (unlike for vmv.s.x.)
1148 if (isScalarSplatInstr(MI) && Require.hasAVLImm() &&
1149 Require.getAVLImm() == 1 && isLMUL1OrSmaller(CurInfo.getVLMUL()) &&
1150 hasUndefinedMergeOp(MI)) {
1151 Used.LMUL = false;
1152 Used.SEWLMULRatio = false;
1153 Used.VLAny = false;
1154 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
1155 Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
1156 else
1157 Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
1158 Used.TailPolicy = false;
1159 }
1160
1161 if (CurInfo.isCompatible(Used, Require, LIS))
1162 return false;
1163
1164 // We didn't find a compatible value. If our AVL is a virtual register,
1165 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1166 // and the last VL/VTYPE we observed is the same, we don't need a
1167 // VSETVLI here.
1168 if (Require.hasAVLReg() && CurInfo.hasCompatibleVTYPE(Used, Require)) {
1169 if (const MachineInstr *DefMI = Require.getAVLDefMI(LIS);
1170 DefMI && isVectorConfigInstr(*DefMI)) {
1171 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS);
1172 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
1173 return false;
1174 }
1175 }
1176
1177 return true;
1178}
1179
1180// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1181// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1182// places.
1183static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1184 DemandedFields &Demanded) {
1185 VSETVLIInfo Info = NewInfo;
1186
1187 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1188 !PrevInfo.isUnknown()) {
1189 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1190 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1191 Info.setVLMul(*NewVLMul);
1192 Demanded.LMUL = true;
1193 }
1194
1195 return Info;
1196}
1197
1198// Given an incoming state reaching MI, minimally modifies that state so that it
1199// is compatible with MI. The resulting state is guaranteed to be semantically
1200// legal for MI, but may not be the state requested by MI.
1201void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1202 const MachineInstr &MI) const {
1203 uint64_t TSFlags = MI.getDesc().TSFlags;
1204 if (!RISCVII::hasSEWOp(TSFlags))
1205 return;
1206
1207 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, LIS);
1208 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1209 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
1210 return;
1211
1212 const VSETVLIInfo PrevInfo = Info;
1213 if (!Info.isValid() || Info.isUnknown())
1214 Info = NewInfo;
1215
1216 DemandedFields Demanded = getDemanded(MI, ST);
1217 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1218
1219 // If MI only demands that VL has the same zeroness, we only need to set the
1220 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1221 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1222 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1223 // variant, so we avoid the transform to prevent extending live range of an
1224 // avl register operand.
1225 // TODO: We can probably relax this for immediates.
1226 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
1227 IncomingInfo.hasSameVLMAX(PrevInfo);
1228 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1229 Info.setAVL(IncomingInfo);
1230
1231 Info.setVTYPE(
1232 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1233 .getVLMUL(),
1234 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1235 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1236 // if needed.
1237 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1238 IncomingInfo.getTailAgnostic(),
1239 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1240 IncomingInfo.getMaskAgnostic());
1241
1242 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1243 // the AVL.
1244 if (Info.hasSEWLMULRatioOnly()) {
1245 VSETVLIInfo RatiolessInfo = IncomingInfo;
1246 RatiolessInfo.setAVL(Info);
1247 Info = RatiolessInfo;
1248 }
1249}
1250
1251// Given a state with which we evaluated MI (see transferBefore above for why
1252// this might be different that the state MI requested), modify the state to
1253// reflect the changes MI might make.
1254void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1255 const MachineInstr &MI) const {
1256 if (isVectorConfigInstr(MI)) {
1257 Info = getInfoForVSETVLI(MI, LIS);
1258 return;
1259 }
1260
1262 // Update AVL to vl-output of the fault first load.
1263 assert(MI.getOperand(1).getReg().isVirtual());
1264 auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1265 SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
1266 VNInfo *VNI = LI.getVNInfoAt(SI);
1267 Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1268 return;
1269 }
1270
1271 // If this is something that updates VL/VTYPE that we don't know about, set
1272 // the state to unknown.
1273 if (MI.isCall() || MI.isInlineAsm() ||
1274 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1275 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1276 Info = VSETVLIInfo::getUnknown();
1277}
1278
1279bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1280 VSETVLIInfo &Info) const {
1281 bool HadVectorOp = false;
1282
1283 Info = BlockInfo[MBB.getNumber()].Pred;
1284 for (const MachineInstr &MI : MBB) {
1285 transferBefore(Info, MI);
1286
1287 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1288 HadVectorOp = true;
1289
1290 transferAfter(Info, MI);
1291 }
1292
1293 return HadVectorOp;
1294}
1295
1296void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1297
1298 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1299
1300 BBInfo.InQueue = false;
1301
1302 // Start with the previous entry so that we keep the most conservative state
1303 // we have ever found.
1304 VSETVLIInfo InInfo = BBInfo.Pred;
1305 if (MBB.pred_empty()) {
1306 // There are no predecessors, so use the default starting status.
1307 InInfo.setUnknown();
1308 } else {
1310 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1311 }
1312
1313 // If we don't have any valid predecessor value, wait until we do.
1314 if (!InInfo.isValid())
1315 return;
1316
1317 // If no change, no need to rerun block
1318 if (InInfo == BBInfo.Pred)
1319 return;
1320
1321 BBInfo.Pred = InInfo;
1322 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1323 << " changed to " << BBInfo.Pred << "\n");
1324
1325 // Note: It's tempting to cache the state changes here, but due to the
1326 // compatibility checks performed a blocks output state can change based on
1327 // the input state. To cache, we'd have to add logic for finding
1328 // never-compatible state changes.
1329 VSETVLIInfo TmpStatus;
1330 computeVLVTYPEChanges(MBB, TmpStatus);
1331
1332 // If the new exit value matches the old exit value, we don't need to revisit
1333 // any blocks.
1334 if (BBInfo.Exit == TmpStatus)
1335 return;
1336
1337 BBInfo.Exit = TmpStatus;
1338 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1339 << " changed to " << BBInfo.Exit << "\n");
1340
1341 // Add the successors to the work list so we can propagate the changed exit
1342 // status.
1343 for (MachineBasicBlock *S : MBB.successors())
1344 if (!BlockInfo[S->getNumber()].InQueue) {
1345 BlockInfo[S->getNumber()].InQueue = true;
1346 WorkList.push(S);
1347 }
1348}
1349
1350// If we weren't able to prove a vsetvli was directly unneeded, it might still
1351// be unneeded if the AVL was a phi node where all incoming values are VL
1352// outputs from the last VSETVLI in their respective basic blocks.
1353bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1354 const MachineBasicBlock &MBB) const {
1356 return true;
1357
1358 if (!Require.hasAVLReg())
1359 return true;
1360
1361 // We need the AVL to have been produced by a PHI node in this basic block.
1362 const VNInfo *Valno = Require.getAVLVNInfo();
1363 if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
1364 return true;
1365
1366 const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
1367
1368 for (auto *PBB : MBB.predecessors()) {
1369 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1370
1371 // We need the PHI input to the be the output of a VSET(I)VLI.
1372 const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
1373 if (!Value)
1374 return true;
1375 MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def);
1376 if (!DefMI || !isVectorConfigInstr(*DefMI))
1377 return true;
1378
1379 // We found a VSET(I)VLI make sure it matches the output of the
1380 // predecessor block.
1381 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS);
1382 if (DefInfo != PBBExit)
1383 return true;
1384
1385 // Require has the same VL as PBBExit, so if the exit from the
1386 // predecessor has the VTYPE we are looking for we might be able
1387 // to avoid a VSETVLI.
1388 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1389 return true;
1390 }
1391
1392 // If all the incoming values to the PHI checked out, we don't need
1393 // to insert a VSETVLI.
1394 return false;
1395}
1396
1397void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1398 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1399 // Track whether the prefix of the block we've scanned is transparent
1400 // (meaning has not yet changed the abstract state).
1401 bool PrefixTransparent = true;
1402 for (MachineInstr &MI : MBB) {
1403 const VSETVLIInfo PrevInfo = CurInfo;
1404 transferBefore(CurInfo, MI);
1405
1406 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1407 if (isVectorConfigInstr(MI)) {
1408 // Conservatively, mark the VL and VTYPE as live.
1409 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1410 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1411 "Unexpected operands where VL and VTYPE should be");
1412 MI.getOperand(3).setIsDead(false);
1413 MI.getOperand(4).setIsDead(false);
1414 PrefixTransparent = false;
1415 }
1416
1417 uint64_t TSFlags = MI.getDesc().TSFlags;
1418 if (RISCVII::hasSEWOp(TSFlags)) {
1419 if (PrevInfo != CurInfo) {
1420 // If this is the first implicit state change, and the state change
1421 // requested can be proven to produce the same register contents, we
1422 // can skip emitting the actual state change and continue as if we
1423 // had since we know the GPR result of the implicit state change
1424 // wouldn't be used and VL/VTYPE registers are correct. Note that
1425 // we *do* need to model the state as if it changed as while the
1426 // register contents are unchanged, the abstract model can change.
1427 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1428 insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1429 PrefixTransparent = false;
1430 }
1431
1432 if (RISCVII::hasVLOp(TSFlags)) {
1433 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1434 if (VLOp.isReg()) {
1435 Register Reg = VLOp.getReg();
1436 LiveInterval &LI = LIS->getInterval(Reg);
1437
1438 // Erase the AVL operand from the instruction.
1439 VLOp.setReg(RISCV::NoRegister);
1440 VLOp.setIsKill(false);
1442 LIS->shrinkToUses(&LI, &DeadMIs);
1443 // We might have separate components that need split due to
1444 // needVSETVLIPHI causing us to skip inserting a new VL def.
1446 LIS->splitSeparateComponents(LI, SplitLIs);
1447
1448 // If the AVL was an immediate > 31, then it would have been emitted
1449 // as an ADDI. However, the ADDI might not have been used in the
1450 // vsetvli, or a vsetvli might not have been emitted, so it may be
1451 // dead now.
1452 for (MachineInstr *DeadMI : DeadMIs) {
1453 if (!TII->isAddImmediate(*DeadMI, Reg))
1454 continue;
1455 LIS->RemoveMachineInstrFromMaps(*DeadMI);
1456 DeadMI->eraseFromParent();
1457 }
1458 }
1459 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1460 /*isImp*/ true));
1461 }
1462 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1463 /*isImp*/ true));
1464 }
1465
1466 if (MI.isCall() || MI.isInlineAsm() ||
1467 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1468 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1469 PrefixTransparent = false;
1470
1471 transferAfter(CurInfo, MI);
1472 }
1473
1474 const auto &Info = BlockInfo[MBB.getNumber()];
1475 if (CurInfo != Info.Exit) {
1476 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1477 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1478 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1479 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1480 }
1481 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1482}
1483
1484/// Perform simple partial redundancy elimination of the VSETVLI instructions
1485/// we're about to insert by looking for cases where we can PRE from the
1486/// beginning of one block to the end of one of its predecessors. Specifically,
1487/// this is geared to catch the common case of a fixed length vsetvl in a single
1488/// block loop when it could execute once in the preheader instead.
1489void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1490 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1491 return;
1492
1493 MachineBasicBlock *UnavailablePred = nullptr;
1494 VSETVLIInfo AvailableInfo;
1495 for (MachineBasicBlock *P : MBB.predecessors()) {
1496 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1497 if (PredInfo.isUnknown()) {
1498 if (UnavailablePred)
1499 return;
1500 UnavailablePred = P;
1501 } else if (!AvailableInfo.isValid()) {
1502 AvailableInfo = PredInfo;
1503 } else if (AvailableInfo != PredInfo) {
1504 return;
1505 }
1506 }
1507
1508 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1509 // phase 3.
1510 if (!UnavailablePred || !AvailableInfo.isValid())
1511 return;
1512
1513 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1514 // the unavailable pred.
1515 if (AvailableInfo.hasSEWLMULRatioOnly())
1516 return;
1517
1518 // Critical edge - TODO: consider splitting?
1519 if (UnavailablePred->succ_size() != 1)
1520 return;
1521
1522 // If the AVL value is a register (other than our VLMAX sentinel),
1523 // we need to prove the value is available at the point we're going
1524 // to insert the vsetvli at.
1525 if (AvailableInfo.hasAVLReg()) {
1526 SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
1527 // This is an inline dominance check which covers the case of
1528 // UnavailablePred being the preheader of a loop.
1529 if (LIS->getMBBFromIndex(SI) != UnavailablePred)
1530 return;
1531 if (!UnavailablePred->terminators().empty() &&
1532 SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
1533 return;
1534 }
1535
1536 // If the AVL isn't used in its predecessors then bail, since we have no AVL
1537 // to insert a vsetvli with.
1538 if (AvailableInfo.hasAVLIgnored())
1539 return;
1540
1541 // Model the effect of changing the input state of the block MBB to
1542 // AvailableInfo. We're looking for two issues here; one legality,
1543 // one profitability.
1544 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1545 // may hit the end of the block with a different end state. We can
1546 // not make this change without reflowing later blocks as well.
1547 // 2) If we don't actually remove a transition, inserting a vsetvli
1548 // into the predecessor block would be correct, but unprofitable.
1549 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1550 VSETVLIInfo CurInfo = AvailableInfo;
1551 int TransitionsRemoved = 0;
1552 for (const MachineInstr &MI : MBB) {
1553 const VSETVLIInfo LastInfo = CurInfo;
1554 const VSETVLIInfo LastOldInfo = OldInfo;
1555 transferBefore(CurInfo, MI);
1556 transferBefore(OldInfo, MI);
1557 if (CurInfo == LastInfo)
1558 TransitionsRemoved++;
1559 if (LastOldInfo == OldInfo)
1560 TransitionsRemoved--;
1561 transferAfter(CurInfo, MI);
1562 transferAfter(OldInfo, MI);
1563 if (CurInfo == OldInfo)
1564 // Convergence. All transitions after this must match by construction.
1565 break;
1566 }
1567 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1568 // Issues 1 and 2 above
1569 return;
1570
1571 // Finally, update both data flow state and insert the actual vsetvli.
1572 // Doing both keeps the code in sync with the dataflow results, which
1573 // is critical for correctness of phase 3.
1574 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1575 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1576 << UnavailablePred->getName() << " with state "
1577 << AvailableInfo << "\n");
1578 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1579 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1580
1581 // Note there's an implicit assumption here that terminators never use
1582 // or modify VL or VTYPE. Also, fallthrough will return end().
1583 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1584 insertVSETVLI(*UnavailablePred, InsertPt,
1585 UnavailablePred->findDebugLoc(InsertPt),
1586 AvailableInfo, OldExit);
1587}
1588
1589// Return true if we can mutate PrevMI to match MI without changing any the
1590// fields which would be observed.
1591static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1592 const MachineInstr &MI,
1593 const DemandedFields &Used,
1594 const MachineRegisterInfo &MRI,
1595 const LiveIntervals *LIS) {
1596 // If the VL values aren't equal, return false if either a) the former is
1597 // demanded, or b) we can't rewrite the former to be the later for
1598 // implementation reasons.
1599 if (!isVLPreservingConfig(MI)) {
1600 if (Used.VLAny)
1601 return false;
1602
1603 if (Used.VLZeroness) {
1604 if (isVLPreservingConfig(PrevMI))
1605 return false;
1606 if (!getInfoForVSETVLI(PrevMI, LIS)
1607 .hasEquallyZeroAVL(getInfoForVSETVLI(MI, LIS), LIS))
1608 return false;
1609 }
1610
1611 auto &AVL = MI.getOperand(1);
1612 auto &PrevAVL = PrevMI.getOperand(1);
1613
1614 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1615 // For now just check that PrevMI uses the same virtual register.
1616 if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
1617 (!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
1618 PrevAVL.getReg() != AVL.getReg()))
1619 return false;
1620 }
1621
1622 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1623 auto PriorVType = PrevMI.getOperand(2).getImm();
1624 auto VType = MI.getOperand(2).getImm();
1625 return areCompatibleVTYPEs(PriorVType, VType, Used);
1626}
1627
1628bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
1629 MachineInstr *NextMI = nullptr;
1630 // We can have arbitrary code in successors, so VL and VTYPE
1631 // must be considered demanded.
1632 DemandedFields Used;
1633 Used.demandVL();
1634 Used.demandVTYPE();
1636 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1637
1638 if (!isVectorConfigInstr(MI)) {
1639 Used.doUnion(getDemanded(MI, ST));
1640 if (MI.isCall() || MI.isInlineAsm() ||
1641 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1642 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1643 NextMI = nullptr;
1644 continue;
1645 }
1646
1647 if (!MI.getOperand(0).isDead())
1648 Used.demandVL();
1649
1650 if (NextMI) {
1651 if (!Used.usedVL() && !Used.usedVTYPE()) {
1652 ToDelete.push_back(&MI);
1653 // Leave NextMI unchanged
1654 continue;
1655 }
1656
1657 if (canMutatePriorConfig(MI, *NextMI, Used, *MRI, LIS)) {
1658 if (!isVLPreservingConfig(*NextMI)) {
1659 Register DefReg = NextMI->getOperand(0).getReg();
1660
1661 MI.getOperand(0).setReg(DefReg);
1662 MI.getOperand(0).setIsDead(false);
1663
1664 // The def of DefReg moved to MI, so extend the LiveInterval up to
1665 // it.
1666 if (DefReg.isVirtual()) {
1667 LiveInterval &DefLI = LIS->getInterval(DefReg);
1668 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1669 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1670 LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1671 DefLI.addSegment(S);
1672 DefVNI->def = MISlot;
1673 // Mark DefLI as spillable if it was previously unspillable
1674 DefLI.setWeight(0);
1675
1676 // DefReg may have had no uses, in which case we need to shrink
1677 // the LiveInterval up to MI.
1678 LIS->shrinkToUses(&DefLI);
1679 }
1680
1681 Register OldVLReg;
1682 if (MI.getOperand(1).isReg())
1683 OldVLReg = MI.getOperand(1).getReg();
1684 if (NextMI->getOperand(1).isImm())
1685 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1686 else
1687 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1688
1689 // Clear NextMI's AVL early so we're not counting it as a use.
1690 if (NextMI->getOperand(1).isReg())
1691 NextMI->getOperand(1).setReg(RISCV::NoRegister);
1692
1693 if (OldVLReg && OldVLReg.isVirtual()) {
1694 // NextMI no longer uses OldVLReg so shrink its LiveInterval.
1695 LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1696
1697 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1698 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1699 MRI->use_nodbg_empty(OldVLReg)) {
1700 VLOpDef->eraseFromParent();
1701 LIS->removeInterval(OldVLReg);
1702 }
1703 }
1704 MI.setDesc(NextMI->getDesc());
1705 }
1706 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1707 ToDelete.push_back(NextMI);
1708 // fallthrough
1709 }
1710 }
1711 NextMI = &MI;
1712 Used = getDemanded(MI, ST);
1713 }
1714
1715 NumCoalescedVSETVL += ToDelete.size();
1716 for (auto *MI : ToDelete) {
1717 LIS->RemoveMachineInstrFromMaps(*MI);
1718 MI->eraseFromParent();
1719 }
1720
1721 return !ToDelete.empty();
1722}
1723
1724void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1725 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1726 MachineInstr &MI = *I++;
1728 Register VLOutput = MI.getOperand(1).getReg();
1729 assert(VLOutput.isVirtual());
1730 if (!MI.getOperand(1).isDead()) {
1731 auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
1732 TII->get(RISCV::PseudoReadVL), VLOutput);
1733 // Move the LiveInterval's definition down to PseudoReadVL.
1734 SlotIndex NewDefSI =
1735 LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
1736 LiveInterval &DefLI = LIS->getInterval(VLOutput);
1737 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1738 DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
1739 DefVNI->def = NewDefSI;
1740 }
1741 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1742 MI.getOperand(1).setReg(RISCV::X0);
1743 }
1744 }
1745}
1746
1747bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1748 // Skip if the vector extension is not enabled.
1750 if (!ST->hasVInstructions())
1751 return false;
1752
1753 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1754
1755 TII = ST->getInstrInfo();
1756 MRI = &MF.getRegInfo();
1757 LIS = &getAnalysis<LiveIntervals>();
1758
1759 assert(BlockInfo.empty() && "Expect empty block infos");
1760 BlockInfo.resize(MF.getNumBlockIDs());
1761
1762 bool HaveVectorOp = false;
1763
1764 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1765 for (const MachineBasicBlock &MBB : MF) {
1766 VSETVLIInfo TmpStatus;
1767 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1768 // Initial exit state is whatever change we found in the block.
1769 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1770 BBInfo.Exit = TmpStatus;
1771 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1772 << " is " << BBInfo.Exit << "\n");
1773
1774 }
1775
1776 // If we didn't find any instructions that need VSETVLI, we're done.
1777 if (!HaveVectorOp) {
1778 BlockInfo.clear();
1779 return false;
1780 }
1781
1782 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1783 // blocks to the list here, but will also add any that need to be revisited
1784 // during Phase 2 processing.
1785 for (const MachineBasicBlock &MBB : MF) {
1786 WorkList.push(&MBB);
1787 BlockInfo[MBB.getNumber()].InQueue = true;
1788 }
1789 while (!WorkList.empty()) {
1790 const MachineBasicBlock &MBB = *WorkList.front();
1791 WorkList.pop();
1792 computeIncomingVLVTYPE(MBB);
1793 }
1794
1795 // Perform partial redundancy elimination of vsetvli transitions.
1796 for (MachineBasicBlock &MBB : MF)
1797 doPRE(MBB);
1798
1799 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1800 // Phase 2 information to avoid adding vsetvlis before the first vector
1801 // instruction in the block if the VL/VTYPE is satisfied by its
1802 // predecessors.
1803 for (MachineBasicBlock &MBB : MF)
1804 emitVSETVLIs(MBB);
1805
1806 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1807 // of VLEFF/VLSEGFF.
1808 for (MachineBasicBlock &MBB : MF)
1809 insertReadVL(MBB);
1810
1811 BlockInfo.clear();
1812 return HaveVectorOp;
1813}
1814
1815/// Returns an instance of the Insert VSETVLI pass.
1817 return new RISCVInsertVSETVLI();
1818}
1819
1820// Now that all vsetvlis are explicit, go through and do block local
1821// DSE and peephole based demanded fields based transforms. Note that
1822// this *must* be done outside the main dataflow so long as we allow
1823// any cross block analysis within the dataflow. We can't have both
1824// demanded fields based mutation and non-local analysis in the
1825// dataflow at the same time without introducing inconsistencies.
1826bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1827 // Skip if the vector extension is not enabled.
1828 ST = &MF.getSubtarget<RISCVSubtarget>();
1829 if (!ST->hasVInstructions())
1830 return false;
1831 TII = ST->getInstrInfo();
1832 MRI = &MF.getRegInfo();
1833 LIS = &getAnalysis<LiveIntervals>();
1834
1835 bool Changed = false;
1836 for (MachineBasicBlock &MBB : MF)
1837 Changed |= coalesceVSETVLIs(MBB);
1838
1839 return Changed;
1840}
1841
1843 return new RISCVCoalesceVSETVLI();
1844}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition: Compiler.h:151
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:537
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static ValueLatticeElement intersect(const ValueLatticeElement &A, const ValueLatticeElement &B)
Combine two sets of facts about the same value into a single set of facts.
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
NewInfo setVTYPE(MI.getOperand(2).getImm())
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, const RISCVSubtarget &ST, const LiveIntervals *LIS)
#define RISCV_COALESCE_VSETVLI_NAME
assert((AVLReg !=RISCV::X0||MI.getOperand(0).getReg() !=RISCV::X0) &&"Can't handle X0, X0 vsetvli yet")
Register AVLReg
static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, RISCVII::VLMUL VLMul)
#define RISCV_INSERT_VSETVLI_NAME
NewInfo setAVLIgnored()
static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, DemandedFields &Demanded)
static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL)
static cl::opt< bool > DisableInsertVSETVLPHIOpt("riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, cl::desc("Disable looking through phis when inserting vsetvlis."))
#define DEBUG_TYPE
static bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, const DemandedFields &Used, const MachineRegisterInfo &MRI, const LiveIntervals *LIS)
return NewInfo
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, false, false) char RISCVCoalesceVSETVLI const LiveIntervals * LIS
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
raw_pwrite_stream & OS
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
BlockData()=default
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
void setWeight(float Value)
Definition: LiveInterval.h:721
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
void removeSegment(SlotIndex Start, SlotIndex End, bool RemoveDeadValNo=false)
Remove the specified interval from this live range.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
reverse_iterator rend()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
reverse_iterator rbegin()
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:555
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
SlotIndexes pass.
Definition: SlotIndexes.h:300
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
TargetPassConfig.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static bool usesMaskPolicy(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool doesForceTailAgnostic(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
static RISCVII::VLMUL getVLMUL(unsigned VType)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
static bool isMaskAgnostic(unsigned VType)
static bool isValidSEW(unsigned SEW)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
static unsigned getSEW(unsigned VType)
std::optional< RISCVII::VLMUL > getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
bool isFaultFirstLoad(const MachineInstr &MI)
static constexpr int64_t VLMaxSentinel
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Uninitialized
Definition: Threading.h:61
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2050
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createRISCVInsertVSETVLIPass()
Returns an instance of the Insert VSETVLI pass.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
char & RISCVInsertVSETVLIID
FunctionPass * createRISCVCoalesceVSETVLIPass()
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Status intersect(const Status &S) const
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162