LLVM 19.0.0git
RISCVInsertVSETVLI.cpp
Go to the documentation of this file.
1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
29#include "llvm/ADT/Statistic.h"
32#include <queue>
33using namespace llvm;
34
35#define DEBUG_TYPE "riscv-insert-vsetvli"
36#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
37
38STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
39STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");
40
42 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
43 cl::desc("Disable looking through phis when inserting vsetvlis."));
44
46 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
47 cl::desc("Enable strict assertion checking for the dataflow algorithm"));
48
49namespace {
50
51static unsigned getVLOpNum(const MachineInstr &MI) {
52 return RISCVII::getVLOpNum(MI.getDesc());
53}
54
55static unsigned getSEWOpNum(const MachineInstr &MI) {
56 return RISCVII::getSEWOpNum(MI.getDesc());
57}
58
59static bool isVectorConfigInstr(const MachineInstr &MI) {
60 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
61 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
62 MI.getOpcode() == RISCV::PseudoVSETIVLI;
63}
64
65/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
66/// VL and only sets VTYPE.
67static bool isVLPreservingConfig(const MachineInstr &MI) {
68 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
69 return false;
70 assert(RISCV::X0 == MI.getOperand(1).getReg());
71 return RISCV::X0 == MI.getOperand(0).getReg();
72}
73
74static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
75 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
76 default:
77 return false;
78 case RISCV::VFMV_S_F:
79 case RISCV::VFMV_V_F:
80 return true;
81 }
82}
83
84static bool isScalarExtractInstr(const MachineInstr &MI) {
85 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
86 default:
87 return false;
88 case RISCV::VMV_X_S:
89 case RISCV::VFMV_F_S:
90 return true;
91 }
92}
93
94static bool isScalarInsertInstr(const MachineInstr &MI) {
95 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
96 default:
97 return false;
98 case RISCV::VMV_S_X:
99 case RISCV::VFMV_S_F:
100 return true;
101 }
102}
103
104static bool isScalarSplatInstr(const MachineInstr &MI) {
105 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
106 default:
107 return false;
108 case RISCV::VMV_V_I:
109 case RISCV::VMV_V_X:
110 case RISCV::VFMV_V_F:
111 return true;
112 }
113}
114
115static bool isVSlideInstr(const MachineInstr &MI) {
116 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
117 default:
118 return false;
119 case RISCV::VSLIDEDOWN_VX:
120 case RISCV::VSLIDEDOWN_VI:
121 case RISCV::VSLIDEUP_VX:
122 case RISCV::VSLIDEUP_VI:
123 return true;
124 }
125}
126
127/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
128/// not a load or store which ignores SEW.
129static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
130 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
131 default:
132 return std::nullopt;
133 case RISCV::VLE8_V:
134 case RISCV::VLSE8_V:
135 case RISCV::VSE8_V:
136 case RISCV::VSSE8_V:
137 return 8;
138 case RISCV::VLE16_V:
139 case RISCV::VLSE16_V:
140 case RISCV::VSE16_V:
141 case RISCV::VSSE16_V:
142 return 16;
143 case RISCV::VLE32_V:
144 case RISCV::VLSE32_V:
145 case RISCV::VSE32_V:
146 case RISCV::VSSE32_V:
147 return 32;
148 case RISCV::VLE64_V:
149 case RISCV::VLSE64_V:
150 case RISCV::VSE64_V:
151 case RISCV::VSSE64_V:
152 return 64;
153 }
154}
155
156static bool isNonZeroLoadImmediate(MachineInstr &MI) {
157 return MI.getOpcode() == RISCV::ADDI &&
158 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
159 MI.getOperand(1).getReg() == RISCV::X0 &&
160 MI.getOperand(2).getImm() != 0;
161}
162
163/// Return true if this is an operation on mask registers. Note that
164/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
165static bool isMaskRegOp(const MachineInstr &MI) {
166 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
167 return false;
168 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
169 // A Log2SEW of 0 is an operation on mask registers only.
170 return Log2SEW == 0;
171}
172
173/// Return true if the inactive elements in the result are entirely undefined.
174/// Note that this is different from "agnostic" as defined by the vector
175/// specification. Agnostic requires each lane to either be undisturbed, or
176/// take the value -1; no other value is allowed.
177static bool hasUndefinedMergeOp(const MachineInstr &MI,
178 const MachineRegisterInfo &MRI) {
179
180 unsigned UseOpIdx;
181 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
182 // If there is no passthrough operand, then the pass through
183 // lanes are undefined.
184 return true;
185
186 // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
187 // operands are solely IMPLICIT_DEFS, then the pass through lanes are
188 // undefined.
189 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
190 if (UseMO.getReg() == RISCV::NoRegister)
191 return true;
192
193 if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
194 if (UseMI->isImplicitDef())
195 return true;
196
197 if (UseMI->isRegSequence()) {
198 for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
199 MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg());
200 if (!SourceMI || !SourceMI->isImplicitDef())
201 return false;
202 }
203 return true;
204 }
205 }
206 return false;
207}
208
209/// Which subfields of VL or VTYPE have values we need to preserve?
210struct DemandedFields {
211 // Some unknown property of VL is used. If demanded, must preserve entire
212 // value.
213 bool VLAny = false;
214 // Only zero vs non-zero is used. If demanded, can change non-zero values.
215 bool VLZeroness = false;
216 // What properties of SEW we need to preserve.
217 enum : uint8_t {
218 SEWEqual = 3, // The exact value of SEW needs to be preserved.
219 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
220 // than or equal to the original value.
221 SEWGreaterThanOrEqualAndLessThan64 =
222 1, // SEW can be changed as long as it's greater
223 // than or equal to the original value, but must be less
224 // than 64.
225 SEWNone = 0 // We don't need to preserve SEW at all.
226 } SEW = SEWNone;
227 bool LMUL = false;
228 bool SEWLMULRatio = false;
229 bool TailPolicy = false;
230 bool MaskPolicy = false;
231
232 // Return true if any part of VTYPE was used
233 bool usedVTYPE() const {
234 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
235 }
236
237 // Return true if any property of VL was used
238 bool usedVL() {
239 return VLAny || VLZeroness;
240 }
241
242 // Mark all VTYPE subfields and properties as demanded
243 void demandVTYPE() {
244 SEW = SEWEqual;
245 LMUL = true;
246 SEWLMULRatio = true;
247 TailPolicy = true;
248 MaskPolicy = true;
249 }
250
251 // Mark all VL properties as demanded
252 void demandVL() {
253 VLAny = true;
254 VLZeroness = true;
255 }
256
257#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
258 /// Support for debugging, callable in GDB: V->dump()
259 LLVM_DUMP_METHOD void dump() const {
260 print(dbgs());
261 dbgs() << "\n";
262 }
263
264 /// Implement operator<<.
265 void print(raw_ostream &OS) const {
266 OS << "{";
267 OS << "VLAny=" << VLAny << ", ";
268 OS << "VLZeroness=" << VLZeroness << ", ";
269 OS << "SEW=";
270 switch (SEW) {
271 case SEWEqual:
272 OS << "SEWEqual";
273 break;
274 case SEWGreaterThanOrEqual:
275 OS << "SEWGreaterThanOrEqual";
276 break;
277 case SEWGreaterThanOrEqualAndLessThan64:
278 OS << "SEWGreaterThanOrEqualAndLessThan64";
279 break;
280 case SEWNone:
281 OS << "SEWNone";
282 break;
283 };
284 OS << ", ";
285 OS << "LMUL=" << LMUL << ", ";
286 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
287 OS << "TailPolicy=" << TailPolicy << ", ";
288 OS << "MaskPolicy=" << MaskPolicy;
289 OS << "}";
290 }
291#endif
292};
293
294#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
296inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
297 DF.print(OS);
298 return OS;
299}
300#endif
301
302/// Return true if moving from CurVType to NewVType is
303/// indistinguishable from the perspective of an instruction (or set
304/// of instructions) which use only the Used subfields and properties.
305static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
306 const DemandedFields &Used) {
307 switch (Used.SEW) {
308 case DemandedFields::SEWNone:
309 break;
310 case DemandedFields::SEWEqual:
311 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
312 return false;
313 break;
314 case DemandedFields::SEWGreaterThanOrEqual:
315 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
316 return false;
317 break;
318 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
319 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
320 RISCVVType::getSEW(NewVType) >= 64)
321 return false;
322 break;
323 }
324
325 if (Used.LMUL &&
326 RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
327 return false;
328
329 if (Used.SEWLMULRatio) {
330 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
331 RISCVVType::getVLMUL(CurVType));
332 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
333 RISCVVType::getVLMUL(NewVType));
334 if (Ratio1 != Ratio2)
335 return false;
336 }
337
338 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
340 return false;
341 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
343 return false;
344 return true;
345}
346
347/// Return the fields and properties demanded by the provided instruction.
348DemandedFields getDemanded(const MachineInstr &MI,
350 const RISCVSubtarget *ST) {
351 // Warning: This function has to work on both the lowered (i.e. post
352 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
353 // that it can't use the value of a SEW, VL, or Policy operand as they might
354 // be stale after lowering.
355
356 // Most instructions don't use any of these subfeilds.
357 DemandedFields Res;
358 // Start conservative if registers are used
359 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
360 Res.demandVL();
361 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
362 Res.demandVTYPE();
363 // Start conservative on the unlowered form too
364 uint64_t TSFlags = MI.getDesc().TSFlags;
365 if (RISCVII::hasSEWOp(TSFlags)) {
366 Res.demandVTYPE();
367 if (RISCVII::hasVLOp(TSFlags))
368 Res.demandVL();
369
370 // Behavior is independent of mask policy.
371 if (!RISCVII::usesMaskPolicy(TSFlags))
372 Res.MaskPolicy = false;
373 }
374
375 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
376 // They instead demand the ratio of the two which is used in computing
377 // EMUL, but which allows us the flexibility to change SEW and LMUL
378 // provided we don't change the ratio.
379 // Note: We assume that the instructions initial SEW is the EEW encoded
380 // in the opcode. This is asserted when constructing the VSETVLIInfo.
381 if (getEEWForLoadStore(MI)) {
382 Res.SEW = DemandedFields::SEWNone;
383 Res.LMUL = false;
384 }
385
386 // Store instructions don't use the policy fields.
387 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
388 Res.TailPolicy = false;
389 Res.MaskPolicy = false;
390 }
391
392 // If this is a mask reg operation, it only cares about VLMAX.
393 // TODO: Possible extensions to this logic
394 // * Probably ok if available VLMax is larger than demanded
395 // * The policy bits can probably be ignored..
396 if (isMaskRegOp(MI)) {
397 Res.SEW = DemandedFields::SEWNone;
398 Res.LMUL = false;
399 }
400
401 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
402 if (isScalarInsertInstr(MI)) {
403 Res.LMUL = false;
404 Res.SEWLMULRatio = false;
405 Res.VLAny = false;
406 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
407 // need to preserve any other bits and are thus compatible with any larger,
408 // etype and can disregard policy bits. Warning: It's tempting to try doing
409 // this for any tail agnostic operation, but we can't as TA requires
410 // tail lanes to either be the original value or -1. We are writing
411 // unknown bits to the lanes here.
412 if (hasUndefinedMergeOp(MI, *MRI)) {
413 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
414 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
415 else
416 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
417 Res.TailPolicy = false;
418 }
419 }
420
421 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
422 if (isScalarExtractInstr(MI)) {
423 assert(!RISCVII::hasVLOp(TSFlags));
424 Res.LMUL = false;
425 Res.SEWLMULRatio = false;
426 Res.TailPolicy = false;
427 Res.MaskPolicy = false;
428 }
429
430 return Res;
431}
432
433/// Defines the abstract state with which the forward dataflow models the
434/// values of the VL and VTYPE registers after insertion.
435class VSETVLIInfo {
436 union {
437 Register AVLReg;
438 unsigned AVLImm;
439 };
440
441 enum : uint8_t {
443 AVLIsReg,
444 AVLIsImm,
445 Unknown,
446 } State = Uninitialized;
447
448 // Fields from VTYPE.
450 uint8_t SEW = 0;
451 uint8_t TailAgnostic : 1;
452 uint8_t MaskAgnostic : 1;
453 uint8_t SEWLMULRatioOnly : 1;
454
455public:
456 VSETVLIInfo()
457 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
458 SEWLMULRatioOnly(false) {}
459
460 static VSETVLIInfo getUnknown() {
461 VSETVLIInfo Info;
462 Info.setUnknown();
463 return Info;
464 }
465
466 bool isValid() const { return State != Uninitialized; }
467 void setUnknown() { State = Unknown; }
468 bool isUnknown() const { return State == Unknown; }
469
470 void setAVLReg(Register Reg) {
471 assert(Reg.isVirtual() || Reg == RISCV::X0 || Reg == RISCV::NoRegister);
472 AVLReg = Reg;
473 State = AVLIsReg;
474 }
475
476 void setAVLImm(unsigned Imm) {
477 AVLImm = Imm;
478 State = AVLIsImm;
479 }
480
481 bool hasAVLImm() const { return State == AVLIsImm; }
482 bool hasAVLReg() const { return State == AVLIsReg; }
483 Register getAVLReg() const {
484 assert(hasAVLReg());
485 return AVLReg;
486 }
487 unsigned getAVLImm() const {
488 assert(hasAVLImm());
489 return AVLImm;
490 }
491
492 void setAVL(VSETVLIInfo Info) {
493 assert(Info.isValid());
494 if (Info.isUnknown())
495 setUnknown();
496 else if (Info.hasAVLReg())
497 setAVLReg(Info.getAVLReg());
498 else {
499 assert(Info.hasAVLImm());
500 setAVLImm(Info.getAVLImm());
501 }
502 }
503
504 unsigned getSEW() const { return SEW; }
505 RISCVII::VLMUL getVLMUL() const { return VLMul; }
506 bool getTailAgnostic() const { return TailAgnostic; }
507 bool getMaskAgnostic() const { return MaskAgnostic; }
508
509 bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
510 if (hasAVLImm())
511 return getAVLImm() > 0;
512 if (hasAVLReg()) {
513 if (getAVLReg() == RISCV::X0)
514 return true;
515 if (MachineInstr *MI = MRI.getVRegDef(getAVLReg());
516 MI && isNonZeroLoadImmediate(*MI))
517 return true;
518 return false;
519 }
520 return false;
521 }
522
523 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
524 const MachineRegisterInfo &MRI) const {
525 if (hasSameAVL(Other))
526 return true;
527 return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI));
528 }
529
530 bool hasSameAVL(const VSETVLIInfo &Other) const {
531 if (hasAVLReg() && Other.hasAVLReg())
532 return getAVLReg() == Other.getAVLReg();
533
534 if (hasAVLImm() && Other.hasAVLImm())
535 return getAVLImm() == Other.getAVLImm();
536
537 return false;
538 }
539
540 void setVTYPE(unsigned VType) {
541 assert(isValid() && !isUnknown() &&
542 "Can't set VTYPE for uninitialized or unknown");
543 VLMul = RISCVVType::getVLMUL(VType);
544 SEW = RISCVVType::getSEW(VType);
545 TailAgnostic = RISCVVType::isTailAgnostic(VType);
546 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
547 }
548 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
549 assert(isValid() && !isUnknown() &&
550 "Can't set VTYPE for uninitialized or unknown");
551 VLMul = L;
552 SEW = S;
553 TailAgnostic = TA;
554 MaskAgnostic = MA;
555 }
556
557 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
558
559 unsigned encodeVTYPE() const {
560 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
561 "Can't encode VTYPE for uninitialized or unknown");
562 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
563 }
564
565 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
566
567 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
568 assert(isValid() && Other.isValid() &&
569 "Can't compare invalid VSETVLIInfos");
570 assert(!isUnknown() && !Other.isUnknown() &&
571 "Can't compare VTYPE in unknown state");
572 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
573 "Can't compare when only LMUL/SEW ratio is valid.");
574 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
575 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
576 Other.MaskAgnostic);
577 }
578
579 unsigned getSEWLMULRatio() const {
580 assert(isValid() && !isUnknown() &&
581 "Can't use VTYPE for uninitialized or unknown");
582 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
583 }
584
585 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
586 // Note that having the same VLMAX ensures that both share the same
587 // function from AVL to VL; that is, they must produce the same VL value
588 // for any given AVL value.
589 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
590 assert(isValid() && Other.isValid() &&
591 "Can't compare invalid VSETVLIInfos");
592 assert(!isUnknown() && !Other.isUnknown() &&
593 "Can't compare VTYPE in unknown state");
594 return getSEWLMULRatio() == Other.getSEWLMULRatio();
595 }
596
597 bool hasCompatibleVTYPE(const DemandedFields &Used,
598 const VSETVLIInfo &Require) const {
599 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
600 }
601
602 // Determine whether the vector instructions requirements represented by
603 // Require are compatible with the previous vsetvli instruction represented
604 // by this. MI is the instruction whose requirements we're considering.
605 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
606 const MachineRegisterInfo &MRI) const {
607 assert(isValid() && Require.isValid() &&
608 "Can't compare invalid VSETVLIInfos");
609 assert(!Require.SEWLMULRatioOnly &&
610 "Expected a valid VTYPE for instruction!");
611 // Nothing is compatible with Unknown.
612 if (isUnknown() || Require.isUnknown())
613 return false;
614
615 // If only our VLMAX ratio is valid, then this isn't compatible.
616 if (SEWLMULRatioOnly)
617 return false;
618
619 if (Used.VLAny && !hasSameAVL(Require))
620 return false;
621
622 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI))
623 return false;
624
625 return hasCompatibleVTYPE(Used, Require);
626 }
627
628 bool operator==(const VSETVLIInfo &Other) const {
629 // Uninitialized is only equal to another Uninitialized.
630 if (!isValid())
631 return !Other.isValid();
632 if (!Other.isValid())
633 return !isValid();
634
635 // Unknown is only equal to another Unknown.
636 if (isUnknown())
637 return Other.isUnknown();
638 if (Other.isUnknown())
639 return isUnknown();
640
641 if (!hasSameAVL(Other))
642 return false;
643
644 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
645 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
646 return false;
647
648 // If only the VLMAX is valid, check that it is the same.
649 if (SEWLMULRatioOnly)
650 return hasSameVLMAX(Other);
651
652 // If the full VTYPE is valid, check that it is the same.
653 return hasSameVTYPE(Other);
654 }
655
656 bool operator!=(const VSETVLIInfo &Other) const {
657 return !(*this == Other);
658 }
659
660 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
661 // both predecessors.
662 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
663 // If the new value isn't valid, ignore it.
664 if (!Other.isValid())
665 return *this;
666
667 // If this value isn't valid, this must be the first predecessor, use it.
668 if (!isValid())
669 return Other;
670
671 // If either is unknown, the result is unknown.
672 if (isUnknown() || Other.isUnknown())
673 return VSETVLIInfo::getUnknown();
674
675 // If we have an exact, match return this.
676 if (*this == Other)
677 return *this;
678
679 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
680 // return an SEW/LMUL ratio only value.
681 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
682 VSETVLIInfo MergeInfo = *this;
683 MergeInfo.SEWLMULRatioOnly = true;
684 return MergeInfo;
685 }
686
687 // Otherwise the result is unknown.
688 return VSETVLIInfo::getUnknown();
689 }
690
691#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
692 /// Support for debugging, callable in GDB: V->dump()
693 LLVM_DUMP_METHOD void dump() const {
694 print(dbgs());
695 dbgs() << "\n";
696 }
697
698 /// Implement operator<<.
699 /// @{
700 void print(raw_ostream &OS) const {
701 OS << "{";
702 if (!isValid())
703 OS << "Uninitialized";
704 if (isUnknown())
705 OS << "unknown";
706 if (hasAVLReg())
707 OS << "AVLReg=" << (unsigned)AVLReg;
708 if (hasAVLImm())
709 OS << "AVLImm=" << (unsigned)AVLImm;
710 OS << ", "
711 << "VLMul=" << (unsigned)VLMul << ", "
712 << "SEW=" << (unsigned)SEW << ", "
713 << "TailAgnostic=" << (bool)TailAgnostic << ", "
714 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
715 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
716 }
717#endif
718};
719
720#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
722inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
723 V.print(OS);
724 return OS;
725}
726#endif
727
728struct BlockData {
729 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
730 // block. Calculated in Phase 2.
731 VSETVLIInfo Exit;
732
733 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
734 // blocks. Calculated in Phase 2, and used by Phase 3.
735 VSETVLIInfo Pred;
736
737 // Keeps track of whether the block is already in the queue.
738 bool InQueue = false;
739
740 BlockData() = default;
741};
742
743class RISCVInsertVSETVLI : public MachineFunctionPass {
744 const RISCVSubtarget *ST;
745 const TargetInstrInfo *TII;
747
748 std::vector<BlockData> BlockInfo;
749 std::queue<const MachineBasicBlock *> WorkList;
750
751public:
752 static char ID;
753
754 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
755 bool runOnMachineFunction(MachineFunction &MF) override;
756
757 void getAnalysisUsage(AnalysisUsage &AU) const override {
758 AU.setPreservesCFG();
760 }
761
762 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
763
764private:
765 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
766 const VSETVLIInfo &CurInfo) const;
767 bool needVSETVLIPHI(const VSETVLIInfo &Require,
768 const MachineBasicBlock &MBB) const;
769 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
770 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
771 void insertVSETVLI(MachineBasicBlock &MBB,
773 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
774
775 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
776 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
777 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
778 VSETVLIInfo &Info) const;
779 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
780 void emitVSETVLIs(MachineBasicBlock &MBB);
781 void doLocalPostpass(MachineBasicBlock &MBB);
782 void doPRE(MachineBasicBlock &MBB);
783 void insertReadVL(MachineBasicBlock &MBB);
784};
785
786} // end anonymous namespace
787
788char RISCVInsertVSETVLI::ID = 0;
789
791 false, false)
792
793// Return a VSETVLIInfo representing the changes made by this VSETVLI or
794// VSETIVLI instruction.
795static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
796 VSETVLIInfo NewInfo;
797 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
798 NewInfo.setAVLImm(MI.getOperand(1).getImm());
799 } else {
800 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
801 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
802 Register AVLReg = MI.getOperand(1).getReg();
803 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
804 "Can't handle X0, X0 vsetvli yet");
805 NewInfo.setAVLReg(AVLReg);
806 }
807 NewInfo.setVTYPE(MI.getOperand(2).getImm());
808
809 return NewInfo;
810}
811
812static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
813 RISCVII::VLMUL VLMul) {
814 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
815 if (Fractional)
816 VLEN = VLEN / LMul;
817 else
818 VLEN = VLEN * LMul;
819 return VLEN/SEW;
820}
821
822static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
823 const RISCVSubtarget &ST,
824 const MachineRegisterInfo *MRI) {
825 VSETVLIInfo InstrInfo;
826
827 bool TailAgnostic = true;
828 bool MaskAgnostic = true;
829 if (!hasUndefinedMergeOp(MI, *MRI)) {
830 // Start with undisturbed.
831 TailAgnostic = false;
832 MaskAgnostic = false;
833
834 // If there is a policy operand, use it.
835 if (RISCVII::hasVecPolicyOp(TSFlags)) {
836 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
837 uint64_t Policy = Op.getImm();
839 "Invalid Policy Value");
840 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
841 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
842 }
843
844 // Some pseudo instructions force a tail agnostic policy despite having a
845 // tied def.
847 TailAgnostic = true;
848
849 if (!RISCVII::usesMaskPolicy(TSFlags))
850 MaskAgnostic = true;
851 }
852
853 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
854
855 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
856 // A Log2SEW of 0 is an operation on mask registers only.
857 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
858 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
859
860 if (RISCVII::hasVLOp(TSFlags)) {
861 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
862 if (VLOp.isImm()) {
863 int64_t Imm = VLOp.getImm();
864 // Conver the VLMax sentintel to X0 register.
865 if (Imm == RISCV::VLMaxSentinel) {
866 // If we know the exact VLEN, see if we can use the constant encoding
867 // for the VLMAX instead. This reduces register pressure slightly.
868 const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul);
869 if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31)
870 InstrInfo.setAVLImm(VLMAX);
871 else
872 InstrInfo.setAVLReg(RISCV::X0);
873 }
874 else
875 InstrInfo.setAVLImm(Imm);
876 } else {
877 InstrInfo.setAVLReg(VLOp.getReg());
878 }
879 } else {
880 assert(isScalarExtractInstr(MI));
881 InstrInfo.setAVLReg(RISCV::NoRegister);
882 }
883#ifndef NDEBUG
884 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
885 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
886 }
887#endif
888 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
889
890 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
891 // AVL operand with the AVL of the defining vsetvli. We avoid general
892 // register AVLs to avoid extending live ranges without being sure we can
893 // kill the original source reg entirely.
894 if (InstrInfo.hasAVLReg() && InstrInfo.getAVLReg().isVirtual()) {
895 MachineInstr *DefMI = MRI->getVRegDef(InstrInfo.getAVLReg());
896 if (DefMI && isVectorConfigInstr(*DefMI)) {
897 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
898 if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
899 (DefInstrInfo.hasAVLImm() || DefInstrInfo.getAVLReg() == RISCV::X0)) {
900 InstrInfo.setAVL(DefInstrInfo);
901 }
902 }
903 }
904
905 return InstrInfo;
906}
907
908void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
909 const VSETVLIInfo &Info,
910 const VSETVLIInfo &PrevInfo) {
911 DebugLoc DL = MI.getDebugLoc();
912 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
913}
914
915void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
917 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
918
919 ++NumInsertedVSETVL;
920 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
921 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
922 // VLMAX.
923 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
924 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
926 .addReg(RISCV::X0, RegState::Kill)
927 .addImm(Info.encodeVTYPE())
928 .addReg(RISCV::VL, RegState::Implicit);
929 return;
930 }
931
932 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
933 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
934 // same, we can use the X0, X0 form.
935 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg() &&
936 Info.getAVLReg().isVirtual()) {
937 if (MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg())) {
938 if (isVectorConfigInstr(*DefMI)) {
939 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
940 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
941 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
943 .addReg(RISCV::X0, RegState::Kill)
944 .addImm(Info.encodeVTYPE())
945 .addReg(RISCV::VL, RegState::Implicit);
946 return;
947 }
948 }
949 }
950 }
951 }
952
953 if (Info.hasAVLImm()) {
954 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
956 .addImm(Info.getAVLImm())
957 .addImm(Info.encodeVTYPE());
958 return;
959 }
960
961 Register AVLReg = Info.getAVLReg();
962 if (AVLReg == RISCV::NoRegister) {
963 // We can only use x0, x0 if there's no chance of the vtype change causing
964 // the previous vl to become invalid.
965 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
966 Info.hasSameVLMAX(PrevInfo)) {
967 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
969 .addReg(RISCV::X0, RegState::Kill)
970 .addImm(Info.encodeVTYPE())
971 .addReg(RISCV::VL, RegState::Implicit);
972 return;
973 }
974 // Otherwise use an AVL of 1 to avoid depending on previous vl.
975 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
977 .addImm(1)
978 .addImm(Info.encodeVTYPE());
979 return;
980 }
981
982 if (AVLReg.isVirtual())
983 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
984
985 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
986 // opcode if the AVLReg is X0 as they have different register classes for
987 // the AVL operand.
988 Register DestReg = RISCV::X0;
989 unsigned Opcode = RISCV::PseudoVSETVLI;
990 if (AVLReg == RISCV::X0) {
991 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
992 Opcode = RISCV::PseudoVSETVLIX0;
993 }
994 BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
996 .addReg(AVLReg)
997 .addImm(Info.encodeVTYPE());
998}
999
1001 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
1002 return Fractional || LMul == 1;
1003}
1004
1005/// Return true if a VSETVLI is required to transition from CurInfo to Require
1006/// before MI.
1007bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
1008 const VSETVLIInfo &Require,
1009 const VSETVLIInfo &CurInfo) const {
1010 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI));
1011
1012 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1013 return true;
1014
1015 DemandedFields Used = getDemanded(MI, MRI, ST);
1016
1017 // A slidedown/slideup with an *undefined* merge op can freely clobber
1018 // elements not copied from the source vector (e.g. masked off, tail, or
1019 // slideup's prefix). Notes:
1020 // * We can't modify SEW here since the slide amount is in units of SEW.
1021 // * VL=1 is special only because we have existing support for zero vs
1022 // non-zero VL. We could generalize this if we had a VL > C predicate.
1023 // * The LMUL1 restriction is for machines whose latency may depend on VL.
1024 // * As above, this is only legal for tail "undefined" not "agnostic".
1025 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1026 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1027 Used.VLAny = false;
1028 Used.VLZeroness = true;
1029 Used.LMUL = false;
1030 Used.TailPolicy = false;
1031 }
1032
1033 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1034 // semantically as vmv.s.x. This is particularly useful since we don't have an
1035 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1036 // Since a splat is non-constant time in LMUL, we do need to be careful to not
1037 // increase the number of active vector registers (unlike for vmv.s.x.)
1038 if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1039 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1040 Used.LMUL = false;
1041 Used.SEWLMULRatio = false;
1042 Used.VLAny = false;
1043 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
1044 Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
1045 else
1046 Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
1047 Used.TailPolicy = false;
1048 }
1049
1050 if (CurInfo.isCompatible(Used, Require, *MRI))
1051 return false;
1052
1053 // We didn't find a compatible value. If our AVL is a virtual register,
1054 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1055 // and the last VL/VTYPE we observed is the same, we don't need a
1056 // VSETVLI here.
1057 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
1058 CurInfo.hasCompatibleVTYPE(Used, Require)) {
1059 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
1060 if (isVectorConfigInstr(*DefMI)) {
1061 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1062 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
1063 return false;
1064 }
1065 }
1066 }
1067
1068 return true;
1069}
1070
1071// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1072// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1073// places.
1074static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1075 DemandedFields &Demanded) {
1076 VSETVLIInfo Info = NewInfo;
1077
1078 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1079 !PrevInfo.isUnknown()) {
1080 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1081 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1082 Info.setVLMul(*NewVLMul);
1083 Demanded.LMUL = true;
1084 }
1085
1086 return Info;
1087}
1088
1089// Given an incoming state reaching MI, minimally modifies that state so that it
1090// is compatible with MI. The resulting state is guaranteed to be semantically
1091// legal for MI, but may not be the state requested by MI.
1092void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1093 const MachineInstr &MI) const {
1094 uint64_t TSFlags = MI.getDesc().TSFlags;
1095 if (!RISCVII::hasSEWOp(TSFlags))
1096 return;
1097
1098 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI);
1099 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1100 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
1101 return;
1102
1103 const VSETVLIInfo PrevInfo = Info;
1104 if (!Info.isValid() || Info.isUnknown())
1105 Info = NewInfo;
1106
1107 DemandedFields Demanded = getDemanded(MI, MRI, ST);
1108 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1109
1110 // If MI only demands that VL has the same zeroness, we only need to set the
1111 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1112 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1113 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1114 // variant, so we avoid the transform to prevent extending live range of an
1115 // avl register operand.
1116 // TODO: We can probably relax this for immediates.
1117 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, *MRI) &&
1118 IncomingInfo.hasSameVLMAX(PrevInfo);
1119 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1120 Info.setAVL(IncomingInfo);
1121
1122 Info.setVTYPE(
1123 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1124 .getVLMUL(),
1125 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1126 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1127 // if needed.
1128 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1129 IncomingInfo.getTailAgnostic(),
1130 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1131 IncomingInfo.getMaskAgnostic());
1132
1133 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1134 // the AVL.
1135 if (Info.hasSEWLMULRatioOnly()) {
1136 VSETVLIInfo RatiolessInfo = IncomingInfo;
1137 RatiolessInfo.setAVL(Info);
1138 Info = RatiolessInfo;
1139 }
1140}
1141
1142// Given a state with which we evaluated MI (see transferBefore above for why
1143// this might be different that the state MI requested), modify the state to
1144// reflect the changes MI might make.
1145void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1146 const MachineInstr &MI) const {
1147 if (isVectorConfigInstr(MI)) {
1148 Info = getInfoForVSETVLI(MI);
1149 return;
1150 }
1151
1153 // Update AVL to vl-output of the fault first load.
1154 Info.setAVLReg(MI.getOperand(1).getReg());
1155 return;
1156 }
1157
1158 // If this is something that updates VL/VTYPE that we don't know about, set
1159 // the state to unknown.
1160 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1161 MI.modifiesRegister(RISCV::VTYPE))
1162 Info = VSETVLIInfo::getUnknown();
1163}
1164
1165bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1166 VSETVLIInfo &Info) const {
1167 bool HadVectorOp = false;
1168
1169 Info = BlockInfo[MBB.getNumber()].Pred;
1170 for (const MachineInstr &MI : MBB) {
1171 transferBefore(Info, MI);
1172
1173 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1174 HadVectorOp = true;
1175
1176 transferAfter(Info, MI);
1177 }
1178
1179 return HadVectorOp;
1180}
1181
1182void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1183
1184 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1185
1186 BBInfo.InQueue = false;
1187
1188 // Start with the previous entry so that we keep the most conservative state
1189 // we have ever found.
1190 VSETVLIInfo InInfo = BBInfo.Pred;
1191 if (MBB.pred_empty()) {
1192 // There are no predecessors, so use the default starting status.
1193 InInfo.setUnknown();
1194 } else {
1196 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1197 }
1198
1199 // If we don't have any valid predecessor value, wait until we do.
1200 if (!InInfo.isValid())
1201 return;
1202
1203 // If no change, no need to rerun block
1204 if (InInfo == BBInfo.Pred)
1205 return;
1206
1207 BBInfo.Pred = InInfo;
1208 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1209 << " changed to " << BBInfo.Pred << "\n");
1210
1211 // Note: It's tempting to cache the state changes here, but due to the
1212 // compatibility checks performed a blocks output state can change based on
1213 // the input state. To cache, we'd have to add logic for finding
1214 // never-compatible state changes.
1215 VSETVLIInfo TmpStatus;
1216 computeVLVTYPEChanges(MBB, TmpStatus);
1217
1218 // If the new exit value matches the old exit value, we don't need to revisit
1219 // any blocks.
1220 if (BBInfo.Exit == TmpStatus)
1221 return;
1222
1223 BBInfo.Exit = TmpStatus;
1224 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1225 << " changed to " << BBInfo.Exit << "\n");
1226
1227 // Add the successors to the work list so we can propagate the changed exit
1228 // status.
1229 for (MachineBasicBlock *S : MBB.successors())
1230 if (!BlockInfo[S->getNumber()].InQueue) {
1231 BlockInfo[S->getNumber()].InQueue = true;
1232 WorkList.push(S);
1233 }
1234}
1235
1236// If we weren't able to prove a vsetvli was directly unneeded, it might still
1237// be unneeded if the AVL is a phi node where all incoming values are VL
1238// outputs from the last VSETVLI in their respective basic blocks.
1239bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1240 const MachineBasicBlock &MBB) const {
1242 return true;
1243
1244 if (!Require.hasAVLReg())
1245 return true;
1246
1247 Register AVLReg = Require.getAVLReg();
1248 if (!AVLReg.isVirtual())
1249 return true;
1250
1251 // We need the AVL to be produce by a PHI node in this basic block.
1252 MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1253 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1254 return true;
1255
1256 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1257 PHIOp += 2) {
1258 Register InReg = PHI->getOperand(PHIOp).getReg();
1259 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1260 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1261 // If the exit from the predecessor has the VTYPE we are looking for
1262 // we might be able to avoid a VSETVLI.
1263 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1264 return true;
1265
1266 // We need the PHI input to the be the output of a VSET(I)VLI.
1267 MachineInstr *DefMI = MRI->getVRegDef(InReg);
1268 if (!DefMI || !isVectorConfigInstr(*DefMI))
1269 return true;
1270
1271 // We found a VSET(I)VLI make sure it matches the output of the
1272 // predecessor block.
1273 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1274 if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1275 !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1276 return true;
1277 }
1278
1279 // If all the incoming values to the PHI checked out, we don't need
1280 // to insert a VSETVLI.
1281 return false;
1282}
1283
1284void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1285 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1286 // Track whether the prefix of the block we've scanned is transparent
1287 // (meaning has not yet changed the abstract state).
1288 bool PrefixTransparent = true;
1289 for (MachineInstr &MI : MBB) {
1290 const VSETVLIInfo PrevInfo = CurInfo;
1291 transferBefore(CurInfo, MI);
1292
1293 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1294 if (isVectorConfigInstr(MI)) {
1295 // Conservatively, mark the VL and VTYPE as live.
1296 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1297 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1298 "Unexpected operands where VL and VTYPE should be");
1299 MI.getOperand(3).setIsDead(false);
1300 MI.getOperand(4).setIsDead(false);
1301 PrefixTransparent = false;
1302 }
1303
1304 uint64_t TSFlags = MI.getDesc().TSFlags;
1305 if (RISCVII::hasSEWOp(TSFlags)) {
1306 if (PrevInfo != CurInfo) {
1307 // If this is the first implicit state change, and the state change
1308 // requested can be proven to produce the same register contents, we
1309 // can skip emitting the actual state change and continue as if we
1310 // had since we know the GPR result of the implicit state change
1311 // wouldn't be used and VL/VTYPE registers are correct. Note that
1312 // we *do* need to model the state as if it changed as while the
1313 // register contents are unchanged, the abstract model can change.
1314 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1315 insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1316 PrefixTransparent = false;
1317 }
1318
1319 if (RISCVII::hasVLOp(TSFlags)) {
1320 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1321 if (VLOp.isReg()) {
1322 Register Reg = VLOp.getReg();
1323 MachineInstr *VLOpDef = MRI->getVRegDef(Reg);
1324
1325 // Erase the AVL operand from the instruction.
1326 VLOp.setReg(RISCV::NoRegister);
1327 VLOp.setIsKill(false);
1328
1329 // If the AVL was an immediate > 31, then it would have been emitted
1330 // as an ADDI. However, the ADDI might not have been used in the
1331 // vsetvli, or a vsetvli might not have been emitted, so it may be
1332 // dead now.
1333 if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) &&
1334 MRI->use_nodbg_empty(Reg))
1335 VLOpDef->eraseFromParent();
1336 }
1337 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1338 /*isImp*/ true));
1339 }
1340 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1341 /*isImp*/ true));
1342 }
1343
1344 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1345 MI.modifiesRegister(RISCV::VTYPE))
1346 PrefixTransparent = false;
1347
1348 transferAfter(CurInfo, MI);
1349 }
1350
1351 // If we reach the end of the block and our current info doesn't match the
1352 // expected info, insert a vsetvli to correct.
1353 if (!UseStrictAsserts) {
1354 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1355 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1356 CurInfo != ExitInfo) {
1357 // Note there's an implicit assumption here that terminators never use
1358 // or modify VL or VTYPE. Also, fallthrough will return end().
1359 auto InsertPt = MBB.getFirstInstrTerminator();
1360 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1361 CurInfo);
1362 CurInfo = ExitInfo;
1363 }
1364 }
1365
1366 if (UseStrictAsserts && CurInfo.isValid()) {
1367 const auto &Info = BlockInfo[MBB.getNumber()];
1368 if (CurInfo != Info.Exit) {
1369 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1370 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1371 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1372 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1373 }
1374 assert(CurInfo == Info.Exit &&
1375 "InsertVSETVLI dataflow invariant violated");
1376 }
1377}
1378
1379/// Perform simple partial redundancy elimination of the VSETVLI instructions
1380/// we're about to insert by looking for cases where we can PRE from the
1381/// beginning of one block to the end of one of its predecessors. Specifically,
1382/// this is geared to catch the common case of a fixed length vsetvl in a single
1383/// block loop when it could execute once in the preheader instead.
1384void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1385 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1386 return;
1387
1388 MachineBasicBlock *UnavailablePred = nullptr;
1389 VSETVLIInfo AvailableInfo;
1390 for (MachineBasicBlock *P : MBB.predecessors()) {
1391 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1392 if (PredInfo.isUnknown()) {
1393 if (UnavailablePred)
1394 return;
1395 UnavailablePred = P;
1396 } else if (!AvailableInfo.isValid()) {
1397 AvailableInfo = PredInfo;
1398 } else if (AvailableInfo != PredInfo) {
1399 return;
1400 }
1401 }
1402
1403 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1404 // phase 3.
1405 if (!UnavailablePred || !AvailableInfo.isValid())
1406 return;
1407
1408 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1409 // the unavailable pred.
1410 if (AvailableInfo.hasSEWLMULRatioOnly())
1411 return;
1412
1413 // Critical edge - TODO: consider splitting?
1414 if (UnavailablePred->succ_size() != 1)
1415 return;
1416
1417 // If the AVL value is a register (other than our VLMAX sentinel),
1418 // we need to prove the value is available at the point we're going
1419 // to insert the vsetvli at.
1420 if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) {
1421 MachineInstr *AVLDefMI = MRI->getVRegDef(AvailableInfo.getAVLReg());
1422 if (!AVLDefMI)
1423 return;
1424 // This is an inline dominance check which covers the case of
1425 // UnavailablePred being the preheader of a loop.
1426 if (AVLDefMI->getParent() != UnavailablePred)
1427 return;
1428 for (auto &TermMI : UnavailablePred->terminators())
1429 if (&TermMI == AVLDefMI)
1430 return;
1431 }
1432
1433 // Model the effect of changing the input state of the block MBB to
1434 // AvailableInfo. We're looking for two issues here; one legality,
1435 // one profitability.
1436 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1437 // may hit the end of the block with a different end state. We can
1438 // not make this change without reflowing later blocks as well.
1439 // 2) If we don't actually remove a transition, inserting a vsetvli
1440 // into the predecessor block would be correct, but unprofitable.
1441 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1442 VSETVLIInfo CurInfo = AvailableInfo;
1443 int TransitionsRemoved = 0;
1444 for (const MachineInstr &MI : MBB) {
1445 const VSETVLIInfo LastInfo = CurInfo;
1446 const VSETVLIInfo LastOldInfo = OldInfo;
1447 transferBefore(CurInfo, MI);
1448 transferBefore(OldInfo, MI);
1449 if (CurInfo == LastInfo)
1450 TransitionsRemoved++;
1451 if (LastOldInfo == OldInfo)
1452 TransitionsRemoved--;
1453 transferAfter(CurInfo, MI);
1454 transferAfter(OldInfo, MI);
1455 if (CurInfo == OldInfo)
1456 // Convergence. All transitions after this must match by construction.
1457 break;
1458 }
1459 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1460 // Issues 1 and 2 above
1461 return;
1462
1463 // Finally, update both data flow state and insert the actual vsetvli.
1464 // Doing both keeps the code in sync with the dataflow results, which
1465 // is critical for correctness of phase 3.
1466 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1467 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1468 << UnavailablePred->getName() << " with state "
1469 << AvailableInfo << "\n");
1470 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1471 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1472
1473 // Note there's an implicit assumption here that terminators never use
1474 // or modify VL or VTYPE. Also, fallthrough will return end().
1475 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1476 insertVSETVLI(*UnavailablePred, InsertPt,
1477 UnavailablePred->findDebugLoc(InsertPt),
1478 AvailableInfo, OldExit);
1479}
1480
1481static void doUnion(DemandedFields &A, DemandedFields B) {
1482 A.VLAny |= B.VLAny;
1483 A.VLZeroness |= B.VLZeroness;
1484 A.SEW = std::max(A.SEW, B.SEW);
1485 A.LMUL |= B.LMUL;
1486 A.SEWLMULRatio |= B.SEWLMULRatio;
1487 A.TailPolicy |= B.TailPolicy;
1488 A.MaskPolicy |= B.MaskPolicy;
1489}
1490
1491// Return true if we can mutate PrevMI to match MI without changing any the
1492// fields which would be observed.
1493static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1494 const MachineInstr &MI,
1495 const DemandedFields &Used,
1496 const MachineRegisterInfo &MRI) {
1497 // If the VL values aren't equal, return false if either a) the former is
1498 // demanded, or b) we can't rewrite the former to be the later for
1499 // implementation reasons.
1500 if (!isVLPreservingConfig(MI)) {
1501 if (Used.VLAny)
1502 return false;
1503
1504 if (Used.VLZeroness) {
1505 if (isVLPreservingConfig(PrevMI))
1506 return false;
1507 if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
1508 MRI))
1509 return false;
1510 }
1511
1512 auto &AVL = MI.getOperand(1);
1513 auto &PrevAVL = PrevMI.getOperand(1);
1514 assert(MRI.isSSA());
1515
1516 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1517 // For now just check that PrevMI uses the same virtual register.
1518 if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
1519 (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()))
1520 return false;
1521 }
1522
1523 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1524 auto PriorVType = PrevMI.getOperand(2).getImm();
1525 auto VType = MI.getOperand(2).getImm();
1526 return areCompatibleVTYPEs(PriorVType, VType, Used);
1527}
1528
1529void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1530 MachineInstr *NextMI = nullptr;
1531 // We can have arbitrary code in successors, so VL and VTYPE
1532 // must be considered demanded.
1533 DemandedFields Used;
1534 Used.demandVL();
1535 Used.demandVTYPE();
1537 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1538
1539 if (!isVectorConfigInstr(MI)) {
1540 doUnion(Used, getDemanded(MI, MRI, ST));
1541 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1542 MI.modifiesRegister(RISCV::VTYPE))
1543 NextMI = nullptr;
1544 continue;
1545 }
1546
1547 Register RegDef = MI.getOperand(0).getReg();
1548 assert(RegDef == RISCV::X0 || RegDef.isVirtual());
1549 if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef))
1550 Used.demandVL();
1551
1552 if (NextMI) {
1553 if (!Used.usedVL() && !Used.usedVTYPE()) {
1554 ToDelete.push_back(&MI);
1555 // Leave NextMI unchanged
1556 continue;
1557 }
1558
1559 if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
1560 if (!isVLPreservingConfig(*NextMI)) {
1561 MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
1562 MI.getOperand(0).setIsDead(false);
1563 Register OldVLReg;
1564 if (MI.getOperand(1).isReg())
1565 OldVLReg = MI.getOperand(1).getReg();
1566 if (NextMI->getOperand(1).isImm())
1567 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1568 else
1569 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1570 if (OldVLReg) {
1571 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1572 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1573 MRI->use_nodbg_empty(OldVLReg))
1574 VLOpDef->eraseFromParent();
1575 }
1576 MI.setDesc(NextMI->getDesc());
1577 }
1578 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1579 ToDelete.push_back(NextMI);
1580 // fallthrough
1581 }
1582 }
1583 NextMI = &MI;
1584 Used = getDemanded(MI, MRI, ST);
1585 }
1586
1587 NumRemovedVSETVL += ToDelete.size();
1588 for (auto *MI : ToDelete)
1589 MI->eraseFromParent();
1590}
1591
1592void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1593 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1594 MachineInstr &MI = *I++;
1596 Register VLOutput = MI.getOperand(1).getReg();
1597 if (!MRI->use_nodbg_empty(VLOutput))
1598 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1599 VLOutput);
1600 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1601 MI.getOperand(1).setReg(RISCV::X0);
1602 }
1603 }
1604}
1605
1606bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1607 // Skip if the vector extension is not enabled.
1609 if (!ST->hasVInstructions())
1610 return false;
1611
1612 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1613
1614 TII = ST->getInstrInfo();
1615 MRI = &MF.getRegInfo();
1616
1617 assert(BlockInfo.empty() && "Expect empty block infos");
1618 BlockInfo.resize(MF.getNumBlockIDs());
1619
1620 bool HaveVectorOp = false;
1621
1622 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1623 for (const MachineBasicBlock &MBB : MF) {
1624 VSETVLIInfo TmpStatus;
1625 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1626 // Initial exit state is whatever change we found in the block.
1627 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1628 BBInfo.Exit = TmpStatus;
1629 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1630 << " is " << BBInfo.Exit << "\n");
1631
1632 }
1633
1634 // If we didn't find any instructions that need VSETVLI, we're done.
1635 if (!HaveVectorOp) {
1636 BlockInfo.clear();
1637 return false;
1638 }
1639
1640 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1641 // blocks to the list here, but will also add any that need to be revisited
1642 // during Phase 2 processing.
1643 for (const MachineBasicBlock &MBB : MF) {
1644 WorkList.push(&MBB);
1645 BlockInfo[MBB.getNumber()].InQueue = true;
1646 }
1647 while (!WorkList.empty()) {
1648 const MachineBasicBlock &MBB = *WorkList.front();
1649 WorkList.pop();
1650 computeIncomingVLVTYPE(MBB);
1651 }
1652
1653 // Perform partial redundancy elimination of vsetvli transitions.
1654 for (MachineBasicBlock &MBB : MF)
1655 doPRE(MBB);
1656
1657 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1658 // Phase 2 information to avoid adding vsetvlis before the first vector
1659 // instruction in the block if the VL/VTYPE is satisfied by its
1660 // predecessors.
1661 for (MachineBasicBlock &MBB : MF)
1662 emitVSETVLIs(MBB);
1663
1664 // Now that all vsetvlis are explicit, go through and do block local
1665 // DSE and peephole based demanded fields based transforms. Note that
1666 // this *must* be done outside the main dataflow so long as we allow
1667 // any cross block analysis within the dataflow. We can't have both
1668 // demanded fields based mutation and non-local analysis in the
1669 // dataflow at the same time without introducing inconsistencies.
1670 for (MachineBasicBlock &MBB : MF)
1671 doLocalPostpass(MBB);
1672
1673 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1674 // of VLEFF/VLSEGFF.
1675 for (MachineBasicBlock &MBB : MF)
1676 insertReadVL(MBB);
1677
1678 BlockInfo.clear();
1679 return HaveVectorOp;
1680}
1681
1682/// Returns an instance of the Insert VSETVLI pass.
1684 return new RISCVInsertVSETVLI();
1685}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition: Compiler.h:151
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static ValueLatticeElement intersect(const ValueLatticeElement &A, const ValueLatticeElement &B)
Combine two sets of facts about the same value into a single set of facts.
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, RISCVII::VLMUL VLMul)
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, const RISCVSubtarget &ST, const MachineRegisterInfo *MRI)
#define RISCV_INSERT_VSETVLI_NAME
static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, DemandedFields &Demanded)
static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL)
static void doUnion(DemandedFields &A, DemandedFields B)
static bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, const DemandedFields &Used, const MachineRegisterInfo &MRI)
static cl::opt< bool > UseStrictAsserts("riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, cl::desc("Enable strict assertion checking for the dataflow algorithm"))
static cl::opt< bool > DisableInsertVSETVLPHIOpt("riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, cl::desc("Disable looking through phis when inserting vsetvlis."))
#define DEBUG_TYPE
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
BlockData()=default
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
reverse_iterator rend()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
unsigned succ_size() const
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
reverse_iterator rbegin()
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:329
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:549
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:543
bool isRegSequence() const
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static bool usesMaskPolicy(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool doesForceTailAgnostic(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
static RISCVII::VLMUL getVLMUL(unsigned VType)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
static bool isMaskAgnostic(unsigned VType)
static bool isValidSEW(unsigned SEW)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
static unsigned getSEW(unsigned VType)
std::optional< RISCVII::VLMUL > getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
bool isFaultFirstLoad(const MachineInstr &MI)
static constexpr int64_t VLMaxSentinel
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Uninitialized
Definition: Threading.h:61
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createRISCVInsertVSETVLIPass()
Returns an instance of the Insert VSETVLI pass.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Status intersect(const Status &S) const