LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267 if (RV64LegalI32 && Subtarget.is64Bit())
269
271
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
279
280 if (!RV64LegalI32) {
283 MVT::i32, Custom);
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
288 } else {
290 if (Subtarget.hasStdExtZbb()) {
293 }
294 }
296 } else {
298 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 nullptr);
300 setLibcallName(RTLIB::MULO_I64, nullptr);
301 }
302
303 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
305 if (RV64LegalI32 && Subtarget.is64Bit())
307 } else if (Subtarget.is64Bit()) {
309 if (!RV64LegalI32)
311 else
313 } else {
315 }
316
317 if (!Subtarget.hasStdExtM()) {
319 XLenVT, Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
333 Expand);
334 }
335
338 Expand);
339
341 Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
352 } else {
354 if (RV64LegalI32 && Subtarget.is64Bit())
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
383 Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
391 else
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
396 if (RV64LegalI32 && Subtarget.is64Bit())
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
410 } else
412 }
413 } else {
415 if (RV64LegalI32 && Subtarget.is64Bit())
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
429 if (RV64LegalI32 && Subtarget.is64Bit())
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
440
441 static const ISD::CondCode FPCCToExpand[] = {
445
446 static const unsigned FPOpToExpand[] = {
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
467
468 if (Subtarget.hasStdExtZfbfmin()) {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
499 }
500
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
506
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
526
528 Subtarget.hasStdExtZfa() ? Legal : Custom);
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
550
551 if (Subtarget.hasStdExtZfa()) {
554 } else {
556 }
557 }
558
559 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
561
562 if (Subtarget.hasStdExtDOrZdinx()) {
563 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
564
565 if (!Subtarget.is64Bit())
567
568 if (Subtarget.hasStdExtZfa()) {
569 setOperationAction(FPRndMode, MVT::f64, Legal);
572 } else {
573 if (Subtarget.is64Bit())
574 setOperationAction(FPRndMode, MVT::f64, Custom);
575
577 }
578
581 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
587 setOperationAction(FPOpToExpand, MVT::f64, Expand);
588 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
589 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
590 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
591 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
595 Subtarget.isSoftFPABI() ? LibCall : Custom);
598 }
599
600 if (Subtarget.is64Bit()) {
603 MVT::i32, Custom);
605 }
606
607 if (Subtarget.hasStdExtFOrZfinx()) {
609 Custom);
610
613 XLenVT, Legal);
614
615 if (RV64LegalI32 && Subtarget.is64Bit())
618 MVT::i32, Legal);
619
622 }
623
626 XLenVT, Custom);
627
629
630 if (Subtarget.is64Bit())
632
633 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
634 // Unfortunately this can't be determined just from the ISA naming string.
636 Subtarget.is64Bit() ? Legal : Custom);
638 Subtarget.is64Bit() ? Legal : Custom);
639
642 if (Subtarget.is64Bit())
644
645 if (Subtarget.hasStdExtZicbop()) {
647 }
648
649 if (Subtarget.hasStdExtA()) {
651 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
653 else
655 } else if (Subtarget.hasForcedAtomics()) {
657 } else {
659 }
660
662
664
665 if (Subtarget.hasVInstructions()) {
667
669 if (RV64LegalI32 && Subtarget.is64Bit())
671
672 // RVV intrinsics may have illegal operands.
673 // We also need to custom legalize vmv.x.s.
676 {MVT::i8, MVT::i16}, Custom);
677 if (Subtarget.is64Bit())
679 MVT::i32, Custom);
680 else
682 MVT::i64, Custom);
683
685 MVT::Other, Custom);
686
687 static const unsigned IntegerVPOps[] = {
688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
691 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
701 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
702
703 static const unsigned FloatingPointVPOps[] = {
704 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
705 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
706 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
707 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
708 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
709 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
710 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
711 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
712 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
713 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
714 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
715 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
716 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
717 ISD::VP_REDUCE_FMAXIMUM};
718
719 static const unsigned IntegerVecReduceOps[] = {
723
724 static const unsigned FloatingPointVecReduceOps[] = {
727
728 if (!Subtarget.is64Bit()) {
729 // We must custom-lower certain vXi64 operations on RV32 due to the vector
730 // element type being illegal.
732 MVT::i64, Custom);
733
734 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
735
736 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
737 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
738 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
739 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
740 MVT::i64, Custom);
741 }
742
743 for (MVT VT : BoolVecVTs) {
744 if (!isTypeLegal(VT))
745 continue;
746
748
749 // Mask VTs are custom-expanded into a series of standard nodes
753 VT, Custom);
754
756 Custom);
757
760 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
761 Expand);
762
763 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
764 Custom);
765
766 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
767
770 Custom);
771
773 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
774 Custom);
775
776 // RVV has native int->float & float->int conversions where the
777 // element type sizes are within one power-of-two of each other. Any
778 // wider distances between type sizes have to be lowered as sequences
779 // which progressively narrow the gap in stages.
784 VT, Custom);
786 Custom);
787
788 // Expand all extending loads to types larger than this, and truncating
789 // stores from types larger than this.
791 setTruncStoreAction(VT, OtherVT, Expand);
793 OtherVT, Expand);
794 }
795
796 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
797 ISD::VP_TRUNCATE, ISD::VP_SETCC},
798 VT, Custom);
799
802
804
805 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
806 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
807
810 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
811 }
812
813 for (MVT VT : IntVecVTs) {
814 if (!isTypeLegal(VT))
815 continue;
816
819
820 // Vectors implement MULHS/MULHU.
822
823 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
824 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
826
828 Legal);
829
831
832 // Custom-lower extensions and truncations from/to mask types.
834 VT, Custom);
835
836 // RVV has native int->float & float->int conversions where the
837 // element type sizes are within one power-of-two of each other. Any
838 // wider distances between type sizes have to be lowered as sequences
839 // which progressively narrow the gap in stages.
844 VT, Custom);
846 Custom);
849 VT, Legal);
850
851 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
852 // nodes which truncate by one power of two at a time.
854
855 // Custom-lower insert/extract operations to simplify patterns.
857 Custom);
858
859 // Custom-lower reduction operations to set up the corresponding custom
860 // nodes' operands.
861 setOperationAction(IntegerVecReduceOps, VT, Custom);
862
863 setOperationAction(IntegerVPOps, VT, Custom);
864
866
868 VT, Custom);
869
871 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
872 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
873 VT, Custom);
874
877 VT, Custom);
878
881
883
885 setTruncStoreAction(VT, OtherVT, Expand);
887 OtherVT, Expand);
888 }
889
892
893 // Splice
895
896 if (Subtarget.hasStdExtZvkb()) {
898 setOperationAction(ISD::VP_BSWAP, VT, Custom);
899 } else {
900 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
902 }
903
904 if (Subtarget.hasStdExtZvbb()) {
906 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
907 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
908 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
909 VT, Custom);
910 } else {
911 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Expand);
916
917 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
918 // range of f32.
919 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
920 if (isTypeLegal(FloatVT)) {
922 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
923 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
924 VT, Custom);
925 }
926 }
927 }
928
929 // Expand various CCs to best match the RVV ISA, which natively supports UNE
930 // but no other unordered comparisons, and supports all ordered comparisons
931 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
932 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
933 // and we pattern-match those back to the "original", swapping operands once
934 // more. This way we catch both operations and both "vf" and "fv" forms with
935 // fewer patterns.
936 static const ISD::CondCode VFPCCToExpand[] = {
940 };
941
942 // TODO: support more ops.
943 static const unsigned ZvfhminPromoteOps[] = {
951
952 // TODO: support more vp ops.
953 static const unsigned ZvfhminPromoteVPOps[] = {
954 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
955 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
956 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
957 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
958 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
959 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
960 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
961 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
962 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
963
964 // Sets common operation actions on RVV floating-point vector types.
965 const auto SetCommonVFPActions = [&](MVT VT) {
967 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
968 // sizes are within one power-of-two of each other. Therefore conversions
969 // between vXf16 and vXf64 must be lowered as sequences which convert via
970 // vXf32.
973 // Custom-lower insert/extract operations to simplify patterns.
975 Custom);
976 // Expand various condition codes (explained above).
977 setCondCodeAction(VFPCCToExpand, VT, Expand);
978
981
985 VT, Custom);
986
987 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
988
989 // Expand FP operations that need libcalls.
1001
1003
1005
1007 VT, Custom);
1008
1010 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1011 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1012 VT, Custom);
1013
1016
1019 VT, Custom);
1020
1023
1025
1026 setOperationAction(FloatingPointVPOps, VT, Custom);
1027
1029 Custom);
1032 VT, Legal);
1037 VT, Custom);
1038 };
1039
1040 // Sets common extload/truncstore actions on RVV floating-point vector
1041 // types.
1042 const auto SetCommonVFPExtLoadTruncStoreActions =
1043 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1044 for (auto SmallVT : SmallerVTs) {
1045 setTruncStoreAction(VT, SmallVT, Expand);
1046 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1047 }
1048 };
1049
1050 if (Subtarget.hasVInstructionsF16()) {
1051 for (MVT VT : F16VecVTs) {
1052 if (!isTypeLegal(VT))
1053 continue;
1054 SetCommonVFPActions(VT);
1055 }
1056 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1057 for (MVT VT : F16VecVTs) {
1058 if (!isTypeLegal(VT))
1059 continue;
1062 Custom);
1063 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1064 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1065 Custom);
1068 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1069 VT, Custom);
1072 VT, Custom);
1073 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1075 // load/store
1077
1078 // Custom split nxv32f16 since nxv32f32 if not legal.
1079 if (VT == MVT::nxv32f16) {
1080 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1081 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1082 continue;
1083 }
1084 // Add more promote ops.
1085 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1086 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1087 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1088 }
1089 }
1090
1091 // TODO: Could we merge some code with zvfhmin?
1092 if (Subtarget.hasVInstructionsBF16()) {
1093 for (MVT VT : BF16VecVTs) {
1094 if (!isTypeLegal(VT))
1095 continue;
1097 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1099 Custom);
1102 VT, Custom);
1104 // TODO: Promote to fp32.
1105 }
1106 }
1107
1108 if (Subtarget.hasVInstructionsF32()) {
1109 for (MVT VT : F32VecVTs) {
1110 if (!isTypeLegal(VT))
1111 continue;
1112 SetCommonVFPActions(VT);
1113 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1114 }
1115 }
1116
1117 if (Subtarget.hasVInstructionsF64()) {
1118 for (MVT VT : F64VecVTs) {
1119 if (!isTypeLegal(VT))
1120 continue;
1121 SetCommonVFPActions(VT);
1122 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1123 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1124 }
1125 }
1126
1127 if (Subtarget.useRVVForFixedLengthVectors()) {
1129 if (!useRVVForFixedLengthVectorVT(VT))
1130 continue;
1131
1132 // By default everything must be expanded.
1133 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1136 setTruncStoreAction(VT, OtherVT, Expand);
1138 OtherVT, Expand);
1139 }
1140
1141 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1142 // expansion to a build_vector of 0s.
1144
1145 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1147 Custom);
1148
1150 Custom);
1151
1153 VT, Custom);
1154
1156
1158
1160
1162
1164
1166
1169 Custom);
1170
1172 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1173 Custom);
1174
1176 {
1185 },
1186 VT, Custom);
1188 Custom);
1189
1191
1192 // Operations below are different for between masks and other vectors.
1193 if (VT.getVectorElementType() == MVT::i1) {
1194 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1195 ISD::OR, ISD::XOR},
1196 VT, Custom);
1197
1198 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1199 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1200 VT, Custom);
1201
1202 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1203 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1204 continue;
1205 }
1206
1207 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1208 // it before type legalization for i64 vectors on RV32. It will then be
1209 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1210 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1211 // improvements first.
1212 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1215 }
1216
1219
1220 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1221 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1222 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1223 ISD::VP_SCATTER},
1224 VT, Custom);
1225
1229 VT, Custom);
1230
1233
1235
1236 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1237 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1239
1242 VT, Custom);
1243
1246
1249
1250 // Custom-lower reduction operations to set up the corresponding custom
1251 // nodes' operands.
1255 VT, Custom);
1256
1257 setOperationAction(IntegerVPOps, VT, Custom);
1258
1259 if (Subtarget.hasStdExtZvkb())
1261
1262 if (Subtarget.hasStdExtZvbb()) {
1265 VT, Custom);
1266 } else {
1267 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1268 // range of f32.
1269 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1270 if (isTypeLegal(FloatVT))
1273 Custom);
1274 }
1275 }
1276
1278 // There are no extending loads or truncating stores.
1279 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1280 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1281 setTruncStoreAction(VT, InnerVT, Expand);
1282 }
1283
1284 if (!useRVVForFixedLengthVectorVT(VT))
1285 continue;
1286
1287 // By default everything must be expanded.
1288 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1290
1291 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1292 // expansion to a build_vector of 0s.
1294
1295 if (VT.getVectorElementType() == MVT::f16 &&
1296 !Subtarget.hasVInstructionsF16()) {
1299 Custom);
1300 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1302 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1303 Custom);
1305 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1306 VT, Custom);
1309 VT, Custom);
1312 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1313 // Don't promote f16 vector operations to f32 if f32 vector type is
1314 // not legal.
1315 // TODO: could split the f16 vector into two vectors and do promotion.
1316 if (!isTypeLegal(F32VecVT))
1317 continue;
1318 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1319 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1320 continue;
1321 }
1322
1323 if (VT.getVectorElementType() == MVT::bf16) {
1325 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1327 Custom);
1330 VT, Custom);
1332 // TODO: Promote to fp32.
1333 continue;
1334 }
1335
1336 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1338 Custom);
1339
1343 VT, Custom);
1344
1347 VT, Custom);
1348
1349 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1350 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1351 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1352 ISD::VP_SCATTER},
1353 VT, Custom);
1354
1359 VT, Custom);
1360
1362
1365 VT, Custom);
1366
1367 setCondCodeAction(VFPCCToExpand, VT, Expand);
1368
1372
1374
1375 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1376
1377 setOperationAction(FloatingPointVPOps, VT, Custom);
1378
1380 Custom);
1387 VT, Custom);
1388 }
1389
1390 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1391 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1392 Custom);
1393 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1395 if (Subtarget.hasStdExtFOrZfinx())
1397 if (Subtarget.hasStdExtDOrZdinx())
1399 }
1400 }
1401
1402 if (Subtarget.hasStdExtA()) {
1404 if (RV64LegalI32 && Subtarget.is64Bit())
1406 }
1407
1408 if (Subtarget.hasForcedAtomics()) {
1409 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1415 XLenVT, LibCall);
1416 }
1417
1418 if (Subtarget.hasVendorXTHeadMemIdx()) {
1419 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1420 setIndexedLoadAction(im, MVT::i8, Legal);
1421 setIndexedStoreAction(im, MVT::i8, Legal);
1422 setIndexedLoadAction(im, MVT::i16, Legal);
1423 setIndexedStoreAction(im, MVT::i16, Legal);
1424 setIndexedLoadAction(im, MVT::i32, Legal);
1425 setIndexedStoreAction(im, MVT::i32, Legal);
1426
1427 if (Subtarget.is64Bit()) {
1428 setIndexedLoadAction(im, MVT::i64, Legal);
1429 setIndexedStoreAction(im, MVT::i64, Legal);
1430 }
1431 }
1432 }
1433
1434 // Function alignments.
1435 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1436 setMinFunctionAlignment(FunctionAlignment);
1437 // Set preferred alignments.
1440
1444 if (Subtarget.is64Bit())
1446
1447 if (Subtarget.hasStdExtFOrZfinx())
1449
1450 if (Subtarget.hasStdExtZbb())
1452
1453 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1455
1456 if (Subtarget.hasStdExtZbkb())
1458 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1460 if (Subtarget.hasStdExtFOrZfinx())
1463 if (Subtarget.hasVInstructions())
1465 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1468 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1471 if (Subtarget.hasVendorXTHeadMemPair())
1473 if (Subtarget.useRVVForFixedLengthVectors())
1475
1476 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1477 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1478
1479 // Disable strict node mutation.
1480 IsStrictFPEnabled = true;
1481}
1482
1484 LLVMContext &Context,
1485 EVT VT) const {
1486 if (!VT.isVector())
1487 return getPointerTy(DL);
1488 if (Subtarget.hasVInstructions() &&
1489 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1490 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1492}
1493
1494MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1495 return Subtarget.getXLenVT();
1496}
1497
1498// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1499bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1500 unsigned VF,
1501 bool IsScalable) const {
1502 if (!Subtarget.hasVInstructions())
1503 return true;
1504
1505 if (!IsScalable)
1506 return true;
1507
1508 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1509 return true;
1510
1511 // Don't allow VF=1 if those types are't legal.
1512 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1513 return true;
1514
1515 // VLEN=32 support is incomplete.
1516 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1517 return true;
1518
1519 // The maximum VF is for the smallest element width with LMUL=8.
1520 // VF must be a power of 2.
1521 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1522 return VF > MaxVF || !isPowerOf2_32(VF);
1523}
1524
1526 return !Subtarget.hasVInstructions() ||
1527 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1528}
1529
1531 const CallInst &I,
1532 MachineFunction &MF,
1533 unsigned Intrinsic) const {
1534 auto &DL = I.getModule()->getDataLayout();
1535
1536 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1537 bool IsUnitStrided, bool UsePtrVal = false) {
1539 // We can't use ptrVal if the intrinsic can access memory before the
1540 // pointer. This means we can't use it for strided or indexed intrinsics.
1541 if (UsePtrVal)
1542 Info.ptrVal = I.getArgOperand(PtrOp);
1543 else
1544 Info.fallbackAddressSpace =
1545 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1546 Type *MemTy;
1547 if (IsStore) {
1548 // Store value is the first operand.
1549 MemTy = I.getArgOperand(0)->getType();
1550 } else {
1551 // Use return type. If it's segment load, return type is a struct.
1552 MemTy = I.getType();
1553 if (MemTy->isStructTy())
1554 MemTy = MemTy->getStructElementType(0);
1555 }
1556 if (!IsUnitStrided)
1557 MemTy = MemTy->getScalarType();
1558
1559 Info.memVT = getValueType(DL, MemTy);
1560 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1562 Info.flags |=
1564 return true;
1565 };
1566
1567 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1569
1571 switch (Intrinsic) {
1572 default:
1573 return false;
1574 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1575 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1576 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1577 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1578 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1579 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1580 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1581 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1582 case Intrinsic::riscv_masked_cmpxchg_i32:
1584 Info.memVT = MVT::i32;
1585 Info.ptrVal = I.getArgOperand(0);
1586 Info.offset = 0;
1587 Info.align = Align(4);
1590 return true;
1591 case Intrinsic::riscv_masked_strided_load:
1592 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1593 /*IsUnitStrided*/ false);
1594 case Intrinsic::riscv_masked_strided_store:
1595 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1596 /*IsUnitStrided*/ false);
1597 case Intrinsic::riscv_seg2_load:
1598 case Intrinsic::riscv_seg3_load:
1599 case Intrinsic::riscv_seg4_load:
1600 case Intrinsic::riscv_seg5_load:
1601 case Intrinsic::riscv_seg6_load:
1602 case Intrinsic::riscv_seg7_load:
1603 case Intrinsic::riscv_seg8_load:
1604 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1605 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1606 case Intrinsic::riscv_seg2_store:
1607 case Intrinsic::riscv_seg3_store:
1608 case Intrinsic::riscv_seg4_store:
1609 case Intrinsic::riscv_seg5_store:
1610 case Intrinsic::riscv_seg6_store:
1611 case Intrinsic::riscv_seg7_store:
1612 case Intrinsic::riscv_seg8_store:
1613 // Operands are (vec, ..., vec, ptr, vl)
1614 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1615 /*IsStore*/ true,
1616 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1617 case Intrinsic::riscv_vle:
1618 case Intrinsic::riscv_vle_mask:
1619 case Intrinsic::riscv_vleff:
1620 case Intrinsic::riscv_vleff_mask:
1621 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1622 /*IsStore*/ false,
1623 /*IsUnitStrided*/ true,
1624 /*UsePtrVal*/ true);
1625 case Intrinsic::riscv_vse:
1626 case Intrinsic::riscv_vse_mask:
1627 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1628 /*IsStore*/ true,
1629 /*IsUnitStrided*/ true,
1630 /*UsePtrVal*/ true);
1631 case Intrinsic::riscv_vlse:
1632 case Intrinsic::riscv_vlse_mask:
1633 case Intrinsic::riscv_vloxei:
1634 case Intrinsic::riscv_vloxei_mask:
1635 case Intrinsic::riscv_vluxei:
1636 case Intrinsic::riscv_vluxei_mask:
1637 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1638 /*IsStore*/ false,
1639 /*IsUnitStrided*/ false);
1640 case Intrinsic::riscv_vsse:
1641 case Intrinsic::riscv_vsse_mask:
1642 case Intrinsic::riscv_vsoxei:
1643 case Intrinsic::riscv_vsoxei_mask:
1644 case Intrinsic::riscv_vsuxei:
1645 case Intrinsic::riscv_vsuxei_mask:
1646 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1647 /*IsStore*/ true,
1648 /*IsUnitStrided*/ false);
1649 case Intrinsic::riscv_vlseg2:
1650 case Intrinsic::riscv_vlseg3:
1651 case Intrinsic::riscv_vlseg4:
1652 case Intrinsic::riscv_vlseg5:
1653 case Intrinsic::riscv_vlseg6:
1654 case Intrinsic::riscv_vlseg7:
1655 case Intrinsic::riscv_vlseg8:
1656 case Intrinsic::riscv_vlseg2ff:
1657 case Intrinsic::riscv_vlseg3ff:
1658 case Intrinsic::riscv_vlseg4ff:
1659 case Intrinsic::riscv_vlseg5ff:
1660 case Intrinsic::riscv_vlseg6ff:
1661 case Intrinsic::riscv_vlseg7ff:
1662 case Intrinsic::riscv_vlseg8ff:
1663 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1664 /*IsStore*/ false,
1665 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1666 case Intrinsic::riscv_vlseg2_mask:
1667 case Intrinsic::riscv_vlseg3_mask:
1668 case Intrinsic::riscv_vlseg4_mask:
1669 case Intrinsic::riscv_vlseg5_mask:
1670 case Intrinsic::riscv_vlseg6_mask:
1671 case Intrinsic::riscv_vlseg7_mask:
1672 case Intrinsic::riscv_vlseg8_mask:
1673 case Intrinsic::riscv_vlseg2ff_mask:
1674 case Intrinsic::riscv_vlseg3ff_mask:
1675 case Intrinsic::riscv_vlseg4ff_mask:
1676 case Intrinsic::riscv_vlseg5ff_mask:
1677 case Intrinsic::riscv_vlseg6ff_mask:
1678 case Intrinsic::riscv_vlseg7ff_mask:
1679 case Intrinsic::riscv_vlseg8ff_mask:
1680 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1681 /*IsStore*/ false,
1682 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1683 case Intrinsic::riscv_vlsseg2:
1684 case Intrinsic::riscv_vlsseg3:
1685 case Intrinsic::riscv_vlsseg4:
1686 case Intrinsic::riscv_vlsseg5:
1687 case Intrinsic::riscv_vlsseg6:
1688 case Intrinsic::riscv_vlsseg7:
1689 case Intrinsic::riscv_vlsseg8:
1690 case Intrinsic::riscv_vloxseg2:
1691 case Intrinsic::riscv_vloxseg3:
1692 case Intrinsic::riscv_vloxseg4:
1693 case Intrinsic::riscv_vloxseg5:
1694 case Intrinsic::riscv_vloxseg6:
1695 case Intrinsic::riscv_vloxseg7:
1696 case Intrinsic::riscv_vloxseg8:
1697 case Intrinsic::riscv_vluxseg2:
1698 case Intrinsic::riscv_vluxseg3:
1699 case Intrinsic::riscv_vluxseg4:
1700 case Intrinsic::riscv_vluxseg5:
1701 case Intrinsic::riscv_vluxseg6:
1702 case Intrinsic::riscv_vluxseg7:
1703 case Intrinsic::riscv_vluxseg8:
1704 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1705 /*IsStore*/ false,
1706 /*IsUnitStrided*/ false);
1707 case Intrinsic::riscv_vlsseg2_mask:
1708 case Intrinsic::riscv_vlsseg3_mask:
1709 case Intrinsic::riscv_vlsseg4_mask:
1710 case Intrinsic::riscv_vlsseg5_mask:
1711 case Intrinsic::riscv_vlsseg6_mask:
1712 case Intrinsic::riscv_vlsseg7_mask:
1713 case Intrinsic::riscv_vlsseg8_mask:
1714 case Intrinsic::riscv_vloxseg2_mask:
1715 case Intrinsic::riscv_vloxseg3_mask:
1716 case Intrinsic::riscv_vloxseg4_mask:
1717 case Intrinsic::riscv_vloxseg5_mask:
1718 case Intrinsic::riscv_vloxseg6_mask:
1719 case Intrinsic::riscv_vloxseg7_mask:
1720 case Intrinsic::riscv_vloxseg8_mask:
1721 case Intrinsic::riscv_vluxseg2_mask:
1722 case Intrinsic::riscv_vluxseg3_mask:
1723 case Intrinsic::riscv_vluxseg4_mask:
1724 case Intrinsic::riscv_vluxseg5_mask:
1725 case Intrinsic::riscv_vluxseg6_mask:
1726 case Intrinsic::riscv_vluxseg7_mask:
1727 case Intrinsic::riscv_vluxseg8_mask:
1728 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1729 /*IsStore*/ false,
1730 /*IsUnitStrided*/ false);
1731 case Intrinsic::riscv_vsseg2:
1732 case Intrinsic::riscv_vsseg3:
1733 case Intrinsic::riscv_vsseg4:
1734 case Intrinsic::riscv_vsseg5:
1735 case Intrinsic::riscv_vsseg6:
1736 case Intrinsic::riscv_vsseg7:
1737 case Intrinsic::riscv_vsseg8:
1738 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1739 /*IsStore*/ true,
1740 /*IsUnitStrided*/ false);
1741 case Intrinsic::riscv_vsseg2_mask:
1742 case Intrinsic::riscv_vsseg3_mask:
1743 case Intrinsic::riscv_vsseg4_mask:
1744 case Intrinsic::riscv_vsseg5_mask:
1745 case Intrinsic::riscv_vsseg6_mask:
1746 case Intrinsic::riscv_vsseg7_mask:
1747 case Intrinsic::riscv_vsseg8_mask:
1748 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1749 /*IsStore*/ true,
1750 /*IsUnitStrided*/ false);
1751 case Intrinsic::riscv_vssseg2:
1752 case Intrinsic::riscv_vssseg3:
1753 case Intrinsic::riscv_vssseg4:
1754 case Intrinsic::riscv_vssseg5:
1755 case Intrinsic::riscv_vssseg6:
1756 case Intrinsic::riscv_vssseg7:
1757 case Intrinsic::riscv_vssseg8:
1758 case Intrinsic::riscv_vsoxseg2:
1759 case Intrinsic::riscv_vsoxseg3:
1760 case Intrinsic::riscv_vsoxseg4:
1761 case Intrinsic::riscv_vsoxseg5:
1762 case Intrinsic::riscv_vsoxseg6:
1763 case Intrinsic::riscv_vsoxseg7:
1764 case Intrinsic::riscv_vsoxseg8:
1765 case Intrinsic::riscv_vsuxseg2:
1766 case Intrinsic::riscv_vsuxseg3:
1767 case Intrinsic::riscv_vsuxseg4:
1768 case Intrinsic::riscv_vsuxseg5:
1769 case Intrinsic::riscv_vsuxseg6:
1770 case Intrinsic::riscv_vsuxseg7:
1771 case Intrinsic::riscv_vsuxseg8:
1772 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1773 /*IsStore*/ true,
1774 /*IsUnitStrided*/ false);
1775 case Intrinsic::riscv_vssseg2_mask:
1776 case Intrinsic::riscv_vssseg3_mask:
1777 case Intrinsic::riscv_vssseg4_mask:
1778 case Intrinsic::riscv_vssseg5_mask:
1779 case Intrinsic::riscv_vssseg6_mask:
1780 case Intrinsic::riscv_vssseg7_mask:
1781 case Intrinsic::riscv_vssseg8_mask:
1782 case Intrinsic::riscv_vsoxseg2_mask:
1783 case Intrinsic::riscv_vsoxseg3_mask:
1784 case Intrinsic::riscv_vsoxseg4_mask:
1785 case Intrinsic::riscv_vsoxseg5_mask:
1786 case Intrinsic::riscv_vsoxseg6_mask:
1787 case Intrinsic::riscv_vsoxseg7_mask:
1788 case Intrinsic::riscv_vsoxseg8_mask:
1789 case Intrinsic::riscv_vsuxseg2_mask:
1790 case Intrinsic::riscv_vsuxseg3_mask:
1791 case Intrinsic::riscv_vsuxseg4_mask:
1792 case Intrinsic::riscv_vsuxseg5_mask:
1793 case Intrinsic::riscv_vsuxseg6_mask:
1794 case Intrinsic::riscv_vsuxseg7_mask:
1795 case Intrinsic::riscv_vsuxseg8_mask:
1796 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1797 /*IsStore*/ true,
1798 /*IsUnitStrided*/ false);
1799 }
1800}
1801
1803 const AddrMode &AM, Type *Ty,
1804 unsigned AS,
1805 Instruction *I) const {
1806 // No global is ever allowed as a base.
1807 if (AM.BaseGV)
1808 return false;
1809
1810 // RVV instructions only support register addressing.
1811 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1812 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1813
1814 // Require a 12-bit signed offset.
1815 if (!isInt<12>(AM.BaseOffs))
1816 return false;
1817
1818 switch (AM.Scale) {
1819 case 0: // "r+i" or just "i", depending on HasBaseReg.
1820 break;
1821 case 1:
1822 if (!AM.HasBaseReg) // allow "r+i".
1823 break;
1824 return false; // disallow "r+r" or "r+r+i".
1825 default:
1826 return false;
1827 }
1828
1829 return true;
1830}
1831
1833 return isInt<12>(Imm);
1834}
1835
1837 return isInt<12>(Imm);
1838}
1839
1840// On RV32, 64-bit integers are split into their high and low parts and held
1841// in two different registers, so the trunc is free since the low register can
1842// just be used.
1843// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1844// isTruncateFree?
1846 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1847 return false;
1848 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1849 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1850 return (SrcBits == 64 && DestBits == 32);
1851}
1852
1854 // We consider i64->i32 free on RV64 since we have good selection of W
1855 // instructions that make promoting operations back to i64 free in many cases.
1856 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1857 !DstVT.isInteger())
1858 return false;
1859 unsigned SrcBits = SrcVT.getSizeInBits();
1860 unsigned DestBits = DstVT.getSizeInBits();
1861 return (SrcBits == 64 && DestBits == 32);
1862}
1863
1865 // Zexts are free if they can be combined with a load.
1866 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1867 // poorly with type legalization of compares preferring sext.
1868 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1869 EVT MemVT = LD->getMemoryVT();
1870 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1871 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1872 LD->getExtensionType() == ISD::ZEXTLOAD))
1873 return true;
1874 }
1875
1876 return TargetLowering::isZExtFree(Val, VT2);
1877}
1878
1880 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1881}
1882
1884 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1885}
1886
1888 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1889}
1890
1892 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1893 Subtarget.hasVendorXCVbitmanip();
1894}
1895
1897 const Instruction &AndI) const {
1898 // We expect to be able to match a bit extraction instruction if the Zbs
1899 // extension is supported and the mask is a power of two. However, we
1900 // conservatively return false if the mask would fit in an ANDI instruction,
1901 // on the basis that it's possible the sinking+duplication of the AND in
1902 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1903 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1904 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1905 return false;
1906 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1907 if (!Mask)
1908 return false;
1909 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1910}
1911
1913 EVT VT = Y.getValueType();
1914
1915 // FIXME: Support vectors once we have tests.
1916 if (VT.isVector())
1917 return false;
1918
1919 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1920 !isa<ConstantSDNode>(Y);
1921}
1922
1924 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1925 if (Subtarget.hasStdExtZbs())
1926 return X.getValueType().isScalarInteger();
1927 auto *C = dyn_cast<ConstantSDNode>(Y);
1928 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1929 if (Subtarget.hasVendorXTHeadBs())
1930 return C != nullptr;
1931 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1932 return C && C->getAPIntValue().ule(10);
1933}
1934
1936 EVT VT) const {
1937 // Only enable for rvv.
1938 if (!VT.isVector() || !Subtarget.hasVInstructions())
1939 return false;
1940
1941 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1942 return false;
1943
1944 return true;
1945}
1946
1948 Type *Ty) const {
1949 assert(Ty->isIntegerTy());
1950
1951 unsigned BitSize = Ty->getIntegerBitWidth();
1952 if (BitSize > Subtarget.getXLen())
1953 return false;
1954
1955 // Fast path, assume 32-bit immediates are cheap.
1956 int64_t Val = Imm.getSExtValue();
1957 if (isInt<32>(Val))
1958 return true;
1959
1960 // A constant pool entry may be more aligned thant he load we're trying to
1961 // replace. If we don't support unaligned scalar mem, prefer the constant
1962 // pool.
1963 // TODO: Can the caller pass down the alignment?
1964 if (!Subtarget.enableUnalignedScalarMem())
1965 return true;
1966
1967 // Prefer to keep the load if it would require many instructions.
1968 // This uses the same threshold we use for constant pools but doesn't
1969 // check useConstantPoolForLargeInts.
1970 // TODO: Should we keep the load only when we're definitely going to emit a
1971 // constant pool?
1972
1974 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1975}
1976
1980 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1981 SelectionDAG &DAG) const {
1982 // One interesting pattern that we'd want to form is 'bit extract':
1983 // ((1 >> Y) & 1) ==/!= 0
1984 // But we also need to be careful not to try to reverse that fold.
1985
1986 // Is this '((1 >> Y) & 1)'?
1987 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1988 return false; // Keep the 'bit extract' pattern.
1989
1990 // Will this be '((1 >> Y) & 1)' after the transform?
1991 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1992 return true; // Do form the 'bit extract' pattern.
1993
1994 // If 'X' is a constant, and we transform, then we will immediately
1995 // try to undo the fold, thus causing endless combine loop.
1996 // So only do the transform if X is not a constant. This matches the default
1997 // implementation of this function.
1998 return !XC;
1999}
2000
2001bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
2002 switch (Opcode) {
2003 case Instruction::Add:
2004 case Instruction::Sub:
2005 case Instruction::Mul:
2006 case Instruction::And:
2007 case Instruction::Or:
2008 case Instruction::Xor:
2009 case Instruction::FAdd:
2010 case Instruction::FSub:
2011 case Instruction::FMul:
2012 case Instruction::FDiv:
2013 case Instruction::ICmp:
2014 case Instruction::FCmp:
2015 return true;
2016 case Instruction::Shl:
2017 case Instruction::LShr:
2018 case Instruction::AShr:
2019 case Instruction::UDiv:
2020 case Instruction::SDiv:
2021 case Instruction::URem:
2022 case Instruction::SRem:
2023 case Instruction::Select:
2024 return Operand == 1;
2025 default:
2026 return false;
2027 }
2028}
2029
2030
2032 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2033 return false;
2034
2035 if (canSplatOperand(I->getOpcode(), Operand))
2036 return true;
2037
2038 auto *II = dyn_cast<IntrinsicInst>(I);
2039 if (!II)
2040 return false;
2041
2042 switch (II->getIntrinsicID()) {
2043 case Intrinsic::fma:
2044 case Intrinsic::vp_fma:
2045 return Operand == 0 || Operand == 1;
2046 case Intrinsic::vp_shl:
2047 case Intrinsic::vp_lshr:
2048 case Intrinsic::vp_ashr:
2049 case Intrinsic::vp_udiv:
2050 case Intrinsic::vp_sdiv:
2051 case Intrinsic::vp_urem:
2052 case Intrinsic::vp_srem:
2053 case Intrinsic::ssub_sat:
2054 case Intrinsic::vp_ssub_sat:
2055 case Intrinsic::usub_sat:
2056 case Intrinsic::vp_usub_sat:
2057 return Operand == 1;
2058 // These intrinsics are commutative.
2059 case Intrinsic::vp_add:
2060 case Intrinsic::vp_mul:
2061 case Intrinsic::vp_and:
2062 case Intrinsic::vp_or:
2063 case Intrinsic::vp_xor:
2064 case Intrinsic::vp_fadd:
2065 case Intrinsic::vp_fmul:
2066 case Intrinsic::vp_icmp:
2067 case Intrinsic::vp_fcmp:
2068 case Intrinsic::smin:
2069 case Intrinsic::vp_smin:
2070 case Intrinsic::umin:
2071 case Intrinsic::vp_umin:
2072 case Intrinsic::smax:
2073 case Intrinsic::vp_smax:
2074 case Intrinsic::umax:
2075 case Intrinsic::vp_umax:
2076 case Intrinsic::sadd_sat:
2077 case Intrinsic::vp_sadd_sat:
2078 case Intrinsic::uadd_sat:
2079 case Intrinsic::vp_uadd_sat:
2080 // These intrinsics have 'vr' versions.
2081 case Intrinsic::vp_sub:
2082 case Intrinsic::vp_fsub:
2083 case Intrinsic::vp_fdiv:
2084 return Operand == 0 || Operand == 1;
2085 default:
2086 return false;
2087 }
2088}
2089
2090/// Check if sinking \p I's operands to I's basic block is profitable, because
2091/// the operands can be folded into a target instruction, e.g.
2092/// splats of scalars can fold into vector instructions.
2094 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2095 using namespace llvm::PatternMatch;
2096
2097 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2098 return false;
2099
2100 // Don't sink splat operands if the target prefers it. Some targets requires
2101 // S2V transfer buffers and we can run out of them copying the same value
2102 // repeatedly.
2103 // FIXME: It could still be worth doing if it would improve vector register
2104 // pressure and prevent a vector spill.
2105 if (!Subtarget.sinkSplatOperands())
2106 return false;
2107
2108 for (auto OpIdx : enumerate(I->operands())) {
2109 if (!canSplatOperand(I, OpIdx.index()))
2110 continue;
2111
2112 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2113 // Make sure we are not already sinking this operand
2114 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2115 continue;
2116
2117 // We are looking for a splat that can be sunk.
2119 m_Undef(), m_ZeroMask())))
2120 continue;
2121
2122 // Don't sink i1 splats.
2123 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2124 continue;
2125
2126 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2127 // and vector registers
2128 for (Use &U : Op->uses()) {
2129 Instruction *Insn = cast<Instruction>(U.getUser());
2130 if (!canSplatOperand(Insn, U.getOperandNo()))
2131 return false;
2132 }
2133
2134 Ops.push_back(&Op->getOperandUse(0));
2135 Ops.push_back(&OpIdx.value());
2136 }
2137 return true;
2138}
2139
2141 unsigned Opc = VecOp.getOpcode();
2142
2143 // Assume target opcodes can't be scalarized.
2144 // TODO - do we have any exceptions?
2145 if (Opc >= ISD::BUILTIN_OP_END)
2146 return false;
2147
2148 // If the vector op is not supported, try to convert to scalar.
2149 EVT VecVT = VecOp.getValueType();
2150 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2151 return true;
2152
2153 // If the vector op is supported, but the scalar op is not, the transform may
2154 // not be worthwhile.
2155 // Permit a vector binary operation can be converted to scalar binary
2156 // operation which is custom lowered with illegal type.
2157 EVT ScalarVT = VecVT.getScalarType();
2158 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2159 isOperationCustom(Opc, ScalarVT);
2160}
2161
2163 const GlobalAddressSDNode *GA) const {
2164 // In order to maximise the opportunity for common subexpression elimination,
2165 // keep a separate ADD node for the global address offset instead of folding
2166 // it in the global address node. Later peephole optimisations may choose to
2167 // fold it back in when profitable.
2168 return false;
2169}
2170
2171// Return one of the followings:
2172// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2173// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2174// positive counterpart, which will be materialized from the first returned
2175// element. The second returned element indicated that there should be a FNEG
2176// followed.
2177// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2178std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2179 EVT VT) const {
2180 if (!Subtarget.hasStdExtZfa())
2181 return std::make_pair(-1, false);
2182
2183 bool IsSupportedVT = false;
2184 if (VT == MVT::f16) {
2185 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2186 } else if (VT == MVT::f32) {
2187 IsSupportedVT = true;
2188 } else if (VT == MVT::f64) {
2189 assert(Subtarget.hasStdExtD() && "Expect D extension");
2190 IsSupportedVT = true;
2191 }
2192
2193 if (!IsSupportedVT)
2194 return std::make_pair(-1, false);
2195
2197 if (Index < 0 && Imm.isNegative())
2198 // Try the combination of its positive counterpart + FNEG.
2199 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2200 else
2201 return std::make_pair(Index, false);
2202}
2203
2205 bool ForCodeSize) const {
2206 bool IsLegalVT = false;
2207 if (VT == MVT::f16)
2208 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2209 else if (VT == MVT::f32)
2210 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2211 else if (VT == MVT::f64)
2212 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2213 else if (VT == MVT::bf16)
2214 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2215
2216 if (!IsLegalVT)
2217 return false;
2218
2219 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2220 return true;
2221
2222 // Cannot create a 64 bit floating-point immediate value for rv32.
2223 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2224 // td can handle +0.0 or -0.0 already.
2225 // -0.0 can be created by fmv + fneg.
2226 return Imm.isZero();
2227 }
2228
2229 // Special case: fmv + fneg
2230 if (Imm.isNegZero())
2231 return true;
2232
2233 // Building an integer and then converting requires a fmv at the end of
2234 // the integer sequence.
2235 const int Cost =
2236 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2237 Subtarget);
2238 return Cost <= FPImmCost;
2239}
2240
2241// TODO: This is very conservative.
2243 unsigned Index) const {
2245 return false;
2246
2247 // Only support extracting a fixed from a fixed vector for now.
2248 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2249 return false;
2250
2251 EVT EltVT = ResVT.getVectorElementType();
2252 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2253
2254 // The smallest type we can slide is i8.
2255 // TODO: We can extract index 0 from a mask vector without a slide.
2256 if (EltVT == MVT::i1)
2257 return false;
2258
2259 unsigned ResElts = ResVT.getVectorNumElements();
2260 unsigned SrcElts = SrcVT.getVectorNumElements();
2261
2262 unsigned MinVLen = Subtarget.getRealMinVLen();
2263 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2264
2265 // If we're extracting only data from the first VLEN bits of the source
2266 // then we can always do this with an m1 vslidedown.vx. Restricting the
2267 // Index ensures we can use a vslidedown.vi.
2268 // TODO: We can generalize this when the exact VLEN is known.
2269 if (Index + ResElts <= MinVLMAX && Index < 31)
2270 return true;
2271
2272 // Convervatively only handle extracting half of a vector.
2273 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2274 // a cheap extract. However, this case is important in practice for
2275 // shuffled extracts of longer vectors. How resolve?
2276 if ((ResElts * 2) != SrcElts)
2277 return false;
2278
2279 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2280 // cheap.
2281 if (Index >= 32)
2282 return false;
2283
2284 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2285 // the upper half of a vector until we have more test coverage.
2286 return Index == 0 || Index == ResElts;
2287}
2288
2291 EVT VT) const {
2292 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2293 // We might still end up using a GPR but that will be decided based on ABI.
2294 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2295 !Subtarget.hasStdExtZfhminOrZhinxmin())
2296 return MVT::f32;
2297
2299
2300 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2301 return MVT::i64;
2302
2303 return PartVT;
2304}
2305
2308 EVT VT) const {
2309 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2310 // We might still end up using a GPR but that will be decided based on ABI.
2311 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2312 !Subtarget.hasStdExtZfhminOrZhinxmin())
2313 return 1;
2314
2316}
2317
2319 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2320 unsigned &NumIntermediates, MVT &RegisterVT) const {
2322 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2323
2324 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2325 IntermediateVT = MVT::i64;
2326
2327 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2328 RegisterVT = MVT::i64;
2329
2330 return NumRegs;
2331}
2332
2333// Changes the condition code and swaps operands if necessary, so the SetCC
2334// operation matches one of the comparisons supported directly by branches
2335// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2336// with 1/-1.
2337static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2338 ISD::CondCode &CC, SelectionDAG &DAG) {
2339 // If this is a single bit test that can't be handled by ANDI, shift the
2340 // bit to be tested to the MSB and perform a signed compare with 0.
2341 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2342 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2343 isa<ConstantSDNode>(LHS.getOperand(1))) {
2344 uint64_t Mask = LHS.getConstantOperandVal(1);
2345 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2346 unsigned ShAmt = 0;
2347 if (isPowerOf2_64(Mask)) {
2349 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2350 } else {
2351 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2352 }
2353
2354 LHS = LHS.getOperand(0);
2355 if (ShAmt != 0)
2356 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2357 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2358 return;
2359 }
2360 }
2361
2362 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2363 int64_t C = RHSC->getSExtValue();
2364 switch (CC) {
2365 default: break;
2366 case ISD::SETGT:
2367 // Convert X > -1 to X >= 0.
2368 if (C == -1) {
2369 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2370 CC = ISD::SETGE;
2371 return;
2372 }
2373 break;
2374 case ISD::SETLT:
2375 // Convert X < 1 to 0 >= X.
2376 if (C == 1) {
2377 RHS = LHS;
2378 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2379 CC = ISD::SETGE;
2380 return;
2381 }
2382 break;
2383 }
2384 }
2385
2386 switch (CC) {
2387 default:
2388 break;
2389 case ISD::SETGT:
2390 case ISD::SETLE:
2391 case ISD::SETUGT:
2392 case ISD::SETULE:
2394 std::swap(LHS, RHS);
2395 break;
2396 }
2397}
2398
2400 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2401 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2402 if (VT.getVectorElementType() == MVT::i1)
2403 KnownSize *= 8;
2404
2405 switch (KnownSize) {
2406 default:
2407 llvm_unreachable("Invalid LMUL.");
2408 case 8:
2410 case 16:
2412 case 32:
2414 case 64:
2416 case 128:
2418 case 256:
2420 case 512:
2422 }
2423}
2424
2426 switch (LMul) {
2427 default:
2428 llvm_unreachable("Invalid LMUL.");
2433 return RISCV::VRRegClassID;
2435 return RISCV::VRM2RegClassID;
2437 return RISCV::VRM4RegClassID;
2439 return RISCV::VRM8RegClassID;
2440 }
2441}
2442
2444 RISCVII::VLMUL LMUL = getLMUL(VT);
2445 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2446 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2447 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2448 LMUL == RISCVII::VLMUL::LMUL_1) {
2449 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2450 "Unexpected subreg numbering");
2451 return RISCV::sub_vrm1_0 + Index;
2452 }
2453 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2454 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2455 "Unexpected subreg numbering");
2456 return RISCV::sub_vrm2_0 + Index;
2457 }
2458 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2459 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2460 "Unexpected subreg numbering");
2461 return RISCV::sub_vrm4_0 + Index;
2462 }
2463 llvm_unreachable("Invalid vector type.");
2464}
2465
2467 if (VT.getVectorElementType() == MVT::i1)
2468 return RISCV::VRRegClassID;
2469 return getRegClassIDForLMUL(getLMUL(VT));
2470}
2471
2472// Attempt to decompose a subvector insert/extract between VecVT and
2473// SubVecVT via subregister indices. Returns the subregister index that
2474// can perform the subvector insert/extract with the given element index, as
2475// well as the index corresponding to any leftover subvectors that must be
2476// further inserted/extracted within the register class for SubVecVT.
2477std::pair<unsigned, unsigned>
2479 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2480 const RISCVRegisterInfo *TRI) {
2481 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2482 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2483 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2484 "Register classes not ordered");
2485 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2486 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2487 // Try to compose a subregister index that takes us from the incoming
2488 // LMUL>1 register class down to the outgoing one. At each step we half
2489 // the LMUL:
2490 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2491 // Note that this is not guaranteed to find a subregister index, such as
2492 // when we are extracting from one VR type to another.
2493 unsigned SubRegIdx = RISCV::NoSubRegister;
2494 for (const unsigned RCID :
2495 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2496 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2497 VecVT = VecVT.getHalfNumVectorElementsVT();
2498 bool IsHi =
2499 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2500 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2501 getSubregIndexByMVT(VecVT, IsHi));
2502 if (IsHi)
2503 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2504 }
2505 return {SubRegIdx, InsertExtractIdx};
2506}
2507
2508// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2509// stores for those types.
2510bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2511 return !Subtarget.useRVVForFixedLengthVectors() ||
2512 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2513}
2514
2516 if (!ScalarTy.isSimple())
2517 return false;
2518 switch (ScalarTy.getSimpleVT().SimpleTy) {
2519 case MVT::iPTR:
2520 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2521 case MVT::i8:
2522 case MVT::i16:
2523 case MVT::i32:
2524 return true;
2525 case MVT::i64:
2526 return Subtarget.hasVInstructionsI64();
2527 case MVT::f16:
2528 return Subtarget.hasVInstructionsF16();
2529 case MVT::f32:
2530 return Subtarget.hasVInstructionsF32();
2531 case MVT::f64:
2532 return Subtarget.hasVInstructionsF64();
2533 default:
2534 return false;
2535 }
2536}
2537
2538
2539unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2540 return NumRepeatedDivisors;
2541}
2542
2544 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2545 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2546 "Unexpected opcode");
2547 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2548 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2550 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2551 if (!II)
2552 return SDValue();
2553 return Op.getOperand(II->VLOperand + 1 + HasChain);
2554}
2555
2557 const RISCVSubtarget &Subtarget) {
2558 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2559 if (!Subtarget.useRVVForFixedLengthVectors())
2560 return false;
2561
2562 // We only support a set of vector types with a consistent maximum fixed size
2563 // across all supported vector element types to avoid legalization issues.
2564 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2565 // fixed-length vector type we support is 1024 bytes.
2566 if (VT.getFixedSizeInBits() > 1024 * 8)
2567 return false;
2568
2569 unsigned MinVLen = Subtarget.getRealMinVLen();
2570
2571 MVT EltVT = VT.getVectorElementType();
2572
2573 // Don't use RVV for vectors we cannot scalarize if required.
2574 switch (EltVT.SimpleTy) {
2575 // i1 is supported but has different rules.
2576 default:
2577 return false;
2578 case MVT::i1:
2579 // Masks can only use a single register.
2580 if (VT.getVectorNumElements() > MinVLen)
2581 return false;
2582 MinVLen /= 8;
2583 break;
2584 case MVT::i8:
2585 case MVT::i16:
2586 case MVT::i32:
2587 break;
2588 case MVT::i64:
2589 if (!Subtarget.hasVInstructionsI64())
2590 return false;
2591 break;
2592 case MVT::f16:
2593 if (!Subtarget.hasVInstructionsF16Minimal())
2594 return false;
2595 break;
2596 case MVT::bf16:
2597 if (!Subtarget.hasVInstructionsBF16())
2598 return false;
2599 break;
2600 case MVT::f32:
2601 if (!Subtarget.hasVInstructionsF32())
2602 return false;
2603 break;
2604 case MVT::f64:
2605 if (!Subtarget.hasVInstructionsF64())
2606 return false;
2607 break;
2608 }
2609
2610 // Reject elements larger than ELEN.
2611 if (EltVT.getSizeInBits() > Subtarget.getELen())
2612 return false;
2613
2614 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2615 // Don't use RVV for types that don't fit.
2616 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2617 return false;
2618
2619 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2620 // the base fixed length RVV support in place.
2621 if (!VT.isPow2VectorType())
2622 return false;
2623
2624 return true;
2625}
2626
2627bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2628 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2629}
2630
2631// Return the largest legal scalable vector type that matches VT's element type.
2633 const RISCVSubtarget &Subtarget) {
2634 // This may be called before legal types are setup.
2635 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2636 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2637 "Expected legal fixed length vector!");
2638
2639 unsigned MinVLen = Subtarget.getRealMinVLen();
2640 unsigned MaxELen = Subtarget.getELen();
2641
2642 MVT EltVT = VT.getVectorElementType();
2643 switch (EltVT.SimpleTy) {
2644 default:
2645 llvm_unreachable("unexpected element type for RVV container");
2646 case MVT::i1:
2647 case MVT::i8:
2648 case MVT::i16:
2649 case MVT::i32:
2650 case MVT::i64:
2651 case MVT::bf16:
2652 case MVT::f16:
2653 case MVT::f32:
2654 case MVT::f64: {
2655 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2656 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2657 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2658 unsigned NumElts =
2660 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2661 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2662 return MVT::getScalableVectorVT(EltVT, NumElts);
2663 }
2664 }
2665}
2666
2668 const RISCVSubtarget &Subtarget) {
2670 Subtarget);
2671}
2672
2674 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2675}
2676
2677// Grow V to consume an entire RVV register.
2679 const RISCVSubtarget &Subtarget) {
2680 assert(VT.isScalableVector() &&
2681 "Expected to convert into a scalable vector!");
2682 assert(V.getValueType().isFixedLengthVector() &&
2683 "Expected a fixed length vector operand!");
2684 SDLoc DL(V);
2685 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2686 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2687}
2688
2689// Shrink V so it's just big enough to maintain a VT's worth of data.
2691 const RISCVSubtarget &Subtarget) {
2693 "Expected to convert into a fixed length vector!");
2694 assert(V.getValueType().isScalableVector() &&
2695 "Expected a scalable vector operand!");
2696 SDLoc DL(V);
2697 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2698 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2699}
2700
2701/// Return the type of the mask type suitable for masking the provided
2702/// vector type. This is simply an i1 element type vector of the same
2703/// (possibly scalable) length.
2704static MVT getMaskTypeFor(MVT VecVT) {
2705 assert(VecVT.isVector());
2707 return MVT::getVectorVT(MVT::i1, EC);
2708}
2709
2710/// Creates an all ones mask suitable for masking a vector of type VecTy with
2711/// vector length VL. .
2712static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2713 SelectionDAG &DAG) {
2714 MVT MaskVT = getMaskTypeFor(VecVT);
2715 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2716}
2717
2718static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2719 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2720 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2721 // canonicalize the representation. InsertVSETVLI will pick the immediate
2722 // encoding later if profitable.
2723 const auto [MinVLMAX, MaxVLMAX] =
2724 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2725 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2726 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2727
2728 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2729}
2730
2731static std::pair<SDValue, SDValue>
2733 const RISCVSubtarget &Subtarget) {
2734 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2735 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2736 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2737 return {Mask, VL};
2738}
2739
2740static std::pair<SDValue, SDValue>
2741getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2742 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2743 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2744 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2745 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2746 return {Mask, VL};
2747}
2748
2749// Gets the two common "VL" operands: an all-ones mask and the vector length.
2750// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2751// the vector type that the fixed-length vector is contained in. Otherwise if
2752// VecVT is scalable, then ContainerVT should be the same as VecVT.
2753static std::pair<SDValue, SDValue>
2754getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2755 const RISCVSubtarget &Subtarget) {
2756 if (VecVT.isFixedLengthVector())
2757 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2758 Subtarget);
2759 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2760 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2761}
2762
2764 SelectionDAG &DAG) const {
2765 assert(VecVT.isScalableVector() && "Expected scalable vector");
2766 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2767 VecVT.getVectorElementCount());
2768}
2769
2770std::pair<unsigned, unsigned>
2772 const RISCVSubtarget &Subtarget) {
2773 assert(VecVT.isScalableVector() && "Expected scalable vector");
2774
2775 unsigned EltSize = VecVT.getScalarSizeInBits();
2776 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2777
2778 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2779 unsigned MaxVLMAX =
2780 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2781
2782 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2783 unsigned MinVLMAX =
2784 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2785
2786 return std::make_pair(MinVLMAX, MaxVLMAX);
2787}
2788
2789// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2790// of either is (currently) supported. This can get us into an infinite loop
2791// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2792// as a ..., etc.
2793// Until either (or both) of these can reliably lower any node, reporting that
2794// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2795// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2796// which is not desirable.
2798 EVT VT, unsigned DefinedValues) const {
2799 return false;
2800}
2801
2803 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2804 // implementation-defined.
2805 if (!VT.isVector())
2807 unsigned DLenFactor = Subtarget.getDLenFactor();
2808 unsigned Cost;
2809 if (VT.isScalableVector()) {
2810 unsigned LMul;
2811 bool Fractional;
2812 std::tie(LMul, Fractional) =
2814 if (Fractional)
2815 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2816 else
2817 Cost = (LMul * DLenFactor);
2818 } else {
2819 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2820 }
2821 return Cost;
2822}
2823
2824
2825/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2826/// is generally quadratic in the number of vreg implied by LMUL. Note that
2827/// operand (index and possibly mask) are handled separately.
2829 return getLMULCost(VT) * getLMULCost(VT);
2830}
2831
2832/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2833/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2834/// or may track the vrgather.vv cost. It is implementation-dependent.
2836 return getLMULCost(VT);
2837}
2838
2839/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2840/// for the type VT. (This does not cover the vslide1up or vslide1down
2841/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2842/// or may track the vrgather.vv cost. It is implementation-dependent.
2844 return getLMULCost(VT);
2845}
2846
2847/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2848/// for the type VT. (This does not cover the vslide1up or vslide1down
2849/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2850/// or may track the vrgather.vv cost. It is implementation-dependent.
2852 return getLMULCost(VT);
2853}
2854
2856 const RISCVSubtarget &Subtarget) {
2857 // RISC-V FP-to-int conversions saturate to the destination register size, but
2858 // don't produce 0 for nan. We can use a conversion instruction and fix the
2859 // nan case with a compare and a select.
2860 SDValue Src = Op.getOperand(0);
2861
2862 MVT DstVT = Op.getSimpleValueType();
2863 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2864
2865 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2866
2867 if (!DstVT.isVector()) {
2868 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2869 // the result.
2870 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2871 Src.getValueType() == MVT::bf16) {
2872 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2873 }
2874
2875 unsigned Opc;
2876 if (SatVT == DstVT)
2877 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2878 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2880 else
2881 return SDValue();
2882 // FIXME: Support other SatVTs by clamping before or after the conversion.
2883
2884 SDLoc DL(Op);
2885 SDValue FpToInt = DAG.getNode(
2886 Opc, DL, DstVT, Src,
2888
2889 if (Opc == RISCVISD::FCVT_WU_RV64)
2890 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2891
2892 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2893 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2895 }
2896
2897 // Vectors.
2898
2899 MVT DstEltVT = DstVT.getVectorElementType();
2900 MVT SrcVT = Src.getSimpleValueType();
2901 MVT SrcEltVT = SrcVT.getVectorElementType();
2902 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2903 unsigned DstEltSize = DstEltVT.getSizeInBits();
2904
2905 // Only handle saturating to the destination type.
2906 if (SatVT != DstEltVT)
2907 return SDValue();
2908
2909 // FIXME: Don't support narrowing by more than 1 steps for now.
2910 if (SrcEltSize > (2 * DstEltSize))
2911 return SDValue();
2912
2913 MVT DstContainerVT = DstVT;
2914 MVT SrcContainerVT = SrcVT;
2915 if (DstVT.isFixedLengthVector()) {
2916 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2917 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2918 assert(DstContainerVT.getVectorElementCount() ==
2919 SrcContainerVT.getVectorElementCount() &&
2920 "Expected same element count");
2921 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2922 }
2923
2924 SDLoc DL(Op);
2925
2926 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2927
2928 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2929 {Src, Src, DAG.getCondCode(ISD::SETNE),
2930 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2931
2932 // Need to widen by more than 1 step, promote the FP type, then do a widening
2933 // convert.
2934 if (DstEltSize > (2 * SrcEltSize)) {
2935 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2936 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2937 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2938 }
2939
2940 unsigned RVVOpc =
2942 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2943
2944 SDValue SplatZero = DAG.getNode(
2945 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2946 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2947 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2948 Res, DAG.getUNDEF(DstContainerVT), VL);
2949
2950 if (DstVT.isFixedLengthVector())
2951 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2952
2953 return Res;
2954}
2955
2957 switch (Opc) {
2958 case ISD::FROUNDEVEN:
2960 case ISD::VP_FROUNDEVEN:
2961 return RISCVFPRndMode::RNE;
2962 case ISD::FTRUNC:
2963 case ISD::STRICT_FTRUNC:
2964 case ISD::VP_FROUNDTOZERO:
2965 return RISCVFPRndMode::RTZ;
2966 case ISD::FFLOOR:
2967 case ISD::STRICT_FFLOOR:
2968 case ISD::VP_FFLOOR:
2969 return RISCVFPRndMode::RDN;
2970 case ISD::FCEIL:
2971 case ISD::STRICT_FCEIL:
2972 case ISD::VP_FCEIL:
2973 return RISCVFPRndMode::RUP;
2974 case ISD::FROUND:
2975 case ISD::STRICT_FROUND:
2976 case ISD::VP_FROUND:
2977 return RISCVFPRndMode::RMM;
2978 case ISD::FRINT:
2979 return RISCVFPRndMode::DYN;
2980 }
2981
2983}
2984
2985// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2986// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2987// the integer domain and back. Taking care to avoid converting values that are
2988// nan or already correct.
2989static SDValue
2991 const RISCVSubtarget &Subtarget) {
2992 MVT VT = Op.getSimpleValueType();
2993 assert(VT.isVector() && "Unexpected type");
2994
2995 SDLoc DL(Op);
2996
2997 SDValue Src = Op.getOperand(0);
2998
2999 MVT ContainerVT = VT;
3000 if (VT.isFixedLengthVector()) {
3001 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3002 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3003 }
3004
3005 SDValue Mask, VL;
3006 if (Op->isVPOpcode()) {
3007 Mask = Op.getOperand(1);
3008 if (VT.isFixedLengthVector())
3009 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3010 Subtarget);
3011 VL = Op.getOperand(2);
3012 } else {
3013 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3014 }
3015
3016 // Freeze the source since we are increasing the number of uses.
3017 Src = DAG.getFreeze(Src);
3018
3019 // We do the conversion on the absolute value and fix the sign at the end.
3020 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3021
3022 // Determine the largest integer that can be represented exactly. This and
3023 // values larger than it don't have any fractional bits so don't need to
3024 // be converted.
3025 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3026 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3027 APFloat MaxVal = APFloat(FltSem);
3028 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3029 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3030 SDValue MaxValNode =
3031 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3032 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3033 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3034
3035 // If abs(Src) was larger than MaxVal or nan, keep it.
3036 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3037 Mask =
3038 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3039 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3040 Mask, Mask, VL});
3041
3042 // Truncate to integer and convert back to FP.
3043 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3044 MVT XLenVT = Subtarget.getXLenVT();
3045 SDValue Truncated;
3046
3047 switch (Op.getOpcode()) {
3048 default:
3049 llvm_unreachable("Unexpected opcode");
3050 case ISD::FCEIL:
3051 case ISD::VP_FCEIL:
3052 case ISD::FFLOOR:
3053 case ISD::VP_FFLOOR:
3054 case ISD::FROUND:
3055 case ISD::FROUNDEVEN:
3056 case ISD::VP_FROUND:
3057 case ISD::VP_FROUNDEVEN:
3058 case ISD::VP_FROUNDTOZERO: {
3061 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3062 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3063 break;
3064 }
3065 case ISD::FTRUNC:
3066 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3067 Mask, VL);
3068 break;
3069 case ISD::FRINT:
3070 case ISD::VP_FRINT:
3071 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3072 break;
3073 case ISD::FNEARBYINT:
3074 case ISD::VP_FNEARBYINT:
3075 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3076 Mask, VL);
3077 break;
3078 }
3079
3080 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3081 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3082 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3083 Mask, VL);
3084
3085 // Restore the original sign so that -0.0 is preserved.
3086 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3087 Src, Src, Mask, VL);
3088
3089 if (!VT.isFixedLengthVector())
3090 return Truncated;
3091
3092 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3093}
3094
3095// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3096// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3097// qNan and coverting the new source to integer and back to FP.
3098static SDValue
3100 const RISCVSubtarget &Subtarget) {
3101 SDLoc DL(Op);
3102 MVT VT = Op.getSimpleValueType();
3103 SDValue Chain = Op.getOperand(0);
3104 SDValue Src = Op.getOperand(1);
3105
3106 MVT ContainerVT = VT;
3107 if (VT.isFixedLengthVector()) {
3108 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3109 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3110 }
3111
3112 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3113
3114 // Freeze the source since we are increasing the number of uses.
3115 Src = DAG.getFreeze(Src);
3116
3117 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3118 MVT MaskVT = Mask.getSimpleValueType();
3120 DAG.getVTList(MaskVT, MVT::Other),
3121 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3122 DAG.getUNDEF(MaskVT), Mask, VL});
3123 Chain = Unorder.getValue(1);
3125 DAG.getVTList(ContainerVT, MVT::Other),
3126 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3127 Chain = Src.getValue(1);
3128
3129 // We do the conversion on the absolute value and fix the sign at the end.
3130 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3131
3132 // Determine the largest integer that can be represented exactly. This and
3133 // values larger than it don't have any fractional bits so don't need to
3134 // be converted.
3135 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3136 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3137 APFloat MaxVal = APFloat(FltSem);
3138 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3139 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3140 SDValue MaxValNode =
3141 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3142 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3143 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3144
3145 // If abs(Src) was larger than MaxVal or nan, keep it.
3146 Mask = DAG.getNode(
3147 RISCVISD::SETCC_VL, DL, MaskVT,
3148 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3149
3150 // Truncate to integer and convert back to FP.
3151 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3152 MVT XLenVT = Subtarget.getXLenVT();
3153 SDValue Truncated;
3154
3155 switch (Op.getOpcode()) {
3156 default:
3157 llvm_unreachable("Unexpected opcode");
3158 case ISD::STRICT_FCEIL:
3159 case ISD::STRICT_FFLOOR:
3160 case ISD::STRICT_FROUND:
3164 Truncated = DAG.getNode(
3165 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3166 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3167 break;
3168 }
3169 case ISD::STRICT_FTRUNC:
3170 Truncated =
3172 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3173 break;
3176 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3177 Mask, VL);
3178 break;
3179 }
3180 Chain = Truncated.getValue(1);
3181
3182 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3183 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3184 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3185 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3186 Truncated, Mask, VL);
3187 Chain = Truncated.getValue(1);
3188 }
3189
3190 // Restore the original sign so that -0.0 is preserved.
3191 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3192 Src, Src, Mask, VL);
3193
3194 if (VT.isFixedLengthVector())
3195 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3196 return DAG.getMergeValues({Truncated, Chain}, DL);
3197}
3198
3199static SDValue
3201 const RISCVSubtarget &Subtarget) {
3202 MVT VT = Op.getSimpleValueType();
3203 if (VT.isVector())
3204 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3205
3206 if (DAG.shouldOptForSize())
3207 return SDValue();
3208
3209 SDLoc DL(Op);
3210 SDValue Src = Op.getOperand(0);
3211
3212 // Create an integer the size of the mantissa with the MSB set. This and all
3213 // values larger than it don't have any fractional bits so don't need to be
3214 // converted.
3215 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3216 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3217 APFloat MaxVal = APFloat(FltSem);
3218 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3219 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3220 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3221
3223 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3224 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3225}
3226
3227// Expand vector LRINT and LLRINT by converting to the integer domain.
3229 const RISCVSubtarget &Subtarget) {
3230 MVT VT = Op.getSimpleValueType();
3231 assert(VT.isVector() && "Unexpected type");
3232
3233 SDLoc DL(Op);
3234 SDValue Src = Op.getOperand(0);
3235 MVT ContainerVT = VT;
3236
3237 if (VT.isFixedLengthVector()) {
3238 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3239 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3240 }
3241
3242 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3243 SDValue Truncated =
3244 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3245
3246 if (!VT.isFixedLengthVector())
3247 return Truncated;
3248
3249 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3250}
3251
3252static SDValue
3254 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3255 SDValue Offset, SDValue Mask, SDValue VL,
3257 if (Merge.isUndef())
3259 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3260 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3261 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3262}
3263
3264static SDValue
3265getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3267 SDValue VL,
3269 if (Merge.isUndef())
3271 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3272 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3273 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3274}
3275
3276static MVT getLMUL1VT(MVT VT) {
3278 "Unexpected vector MVT");
3282}
3283
3287 int64_t Addend;
3288};
3289
3290static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3292 // We will use a SINT_TO_FP to materialize this constant so we should use a
3293 // signed APSInt here.
3294 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3295 // We use an arbitrary rounding mode here. If a floating-point is an exact
3296 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3297 // the rounding mode changes the output value, then it is not an exact
3298 // integer.
3300 bool IsExact;
3301 // If it is out of signed integer range, it will return an invalid operation.
3302 // If it is not an exact integer, IsExact is false.
3303 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3305 !IsExact)
3306 return std::nullopt;
3307 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3308}
3309
3310// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3311// to the (non-zero) step S and start value X. This can be then lowered as the
3312// RVV sequence (VID * S) + X, for example.
3313// The step S is represented as an integer numerator divided by a positive
3314// denominator. Note that the implementation currently only identifies
3315// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3316// cannot detect 2/3, for example.
3317// Note that this method will also match potentially unappealing index
3318// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3319// determine whether this is worth generating code for.
3320static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3321 unsigned EltSizeInBits) {
3322 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3323 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3324 return std::nullopt;
3325 bool IsInteger = Op.getValueType().isInteger();
3326
3327 std::optional<unsigned> SeqStepDenom;
3328 std::optional<int64_t> SeqStepNum, SeqAddend;
3329 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3330 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3331
3332 // First extract the ops into a list of constant integer values. This may not
3333 // be possible for floats if they're not all representable as integers.
3335 const unsigned OpSize = Op.getScalarValueSizeInBits();
3336 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3337 if (Elt.isUndef()) {
3338 Elts[Idx] = std::nullopt;
3339 continue;
3340 }
3341 if (IsInteger) {
3342 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3343 } else {
3344 auto ExactInteger =
3345 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3346 if (!ExactInteger)
3347 return std::nullopt;
3348 Elts[Idx] = *ExactInteger;
3349 }
3350 }
3351
3352 for (auto [Idx, Elt] : enumerate(Elts)) {
3353 // Assume undef elements match the sequence; we just have to be careful
3354 // when interpolating across them.
3355 if (!Elt)
3356 continue;
3357
3358 if (PrevElt) {
3359 // Calculate the step since the last non-undef element, and ensure
3360 // it's consistent across the entire sequence.
3361 unsigned IdxDiff = Idx - PrevElt->second;
3362 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3363
3364 // A zero-value value difference means that we're somewhere in the middle
3365 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3366 // step change before evaluating the sequence.
3367 if (ValDiff == 0)
3368 continue;
3369
3370 int64_t Remainder = ValDiff % IdxDiff;
3371 // Normalize the step if it's greater than 1.
3372 if (Remainder != ValDiff) {
3373 // The difference must cleanly divide the element span.
3374 if (Remainder != 0)
3375 return std::nullopt;
3376 ValDiff /= IdxDiff;
3377 IdxDiff = 1;
3378 }
3379
3380 if (!SeqStepNum)
3381 SeqStepNum = ValDiff;
3382 else if (ValDiff != SeqStepNum)
3383 return std::nullopt;
3384
3385 if (!SeqStepDenom)
3386 SeqStepDenom = IdxDiff;
3387 else if (IdxDiff != *SeqStepDenom)
3388 return std::nullopt;
3389 }
3390
3391 // Record this non-undef element for later.
3392 if (!PrevElt || PrevElt->first != *Elt)
3393 PrevElt = std::make_pair(*Elt, Idx);
3394 }
3395
3396 // We need to have logged a step for this to count as a legal index sequence.
3397 if (!SeqStepNum || !SeqStepDenom)
3398 return std::nullopt;
3399
3400 // Loop back through the sequence and validate elements we might have skipped
3401 // while waiting for a valid step. While doing this, log any sequence addend.
3402 for (auto [Idx, Elt] : enumerate(Elts)) {
3403 if (!Elt)
3404 continue;
3405 uint64_t ExpectedVal =
3406 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3407 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3408 if (!SeqAddend)
3409 SeqAddend = Addend;
3410 else if (Addend != SeqAddend)
3411 return std::nullopt;
3412 }
3413
3414 assert(SeqAddend && "Must have an addend if we have a step");
3415
3416 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3417}
3418
3419// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3420// and lower it as a VRGATHER_VX_VL from the source vector.
3421static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3422 SelectionDAG &DAG,
3423 const RISCVSubtarget &Subtarget) {
3424 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3425 return SDValue();
3426 SDValue Vec = SplatVal.getOperand(0);
3427 // Only perform this optimization on vectors of the same size for simplicity.
3428 // Don't perform this optimization for i1 vectors.
3429 // FIXME: Support i1 vectors, maybe by promoting to i8?
3430 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3431 return SDValue();
3432 SDValue Idx = SplatVal.getOperand(1);
3433 // The index must be a legal type.
3434 if (Idx.getValueType() != Subtarget.getXLenVT())
3435 return SDValue();
3436
3437 MVT ContainerVT = VT;
3438 if (VT.isFixedLengthVector()) {
3439 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3440 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3441 }
3442
3443 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3444
3445 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3446 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3447
3448 if (!VT.isFixedLengthVector())
3449 return Gather;
3450
3451 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3452}
3453
3454
3455/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3456/// which constitute a large proportion of the elements. In such cases we can
3457/// splat a vector with the dominant element and make up the shortfall with
3458/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3459/// Note that this includes vectors of 2 elements by association. The
3460/// upper-most element is the "dominant" one, allowing us to use a splat to
3461/// "insert" the upper element, and an insert of the lower element at position
3462/// 0, which improves codegen.
3464 const RISCVSubtarget &Subtarget) {
3465 MVT VT = Op.getSimpleValueType();
3466 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3467
3468 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3469
3470 SDLoc DL(Op);
3471 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3472
3473 MVT XLenVT = Subtarget.getXLenVT();
3474 unsigned NumElts = Op.getNumOperands();
3475
3476 SDValue DominantValue;
3477 unsigned MostCommonCount = 0;
3478 DenseMap<SDValue, unsigned> ValueCounts;
3479 unsigned NumUndefElts =
3480 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3481
3482 // Track the number of scalar loads we know we'd be inserting, estimated as
3483 // any non-zero floating-point constant. Other kinds of element are either
3484 // already in registers or are materialized on demand. The threshold at which
3485 // a vector load is more desirable than several scalar materializion and
3486 // vector-insertion instructions is not known.
3487 unsigned NumScalarLoads = 0;
3488
3489 for (SDValue V : Op->op_values()) {
3490 if (V.isUndef())
3491 continue;
3492
3493 ValueCounts.insert(std::make_pair(V, 0));
3494 unsigned &Count = ValueCounts[V];
3495 if (0 == Count)
3496 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3497 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3498
3499 // Is this value dominant? In case of a tie, prefer the highest element as
3500 // it's cheaper to insert near the beginning of a vector than it is at the
3501 // end.
3502 if (++Count >= MostCommonCount) {
3503 DominantValue = V;
3504 MostCommonCount = Count;
3505 }
3506 }
3507
3508 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3509 unsigned NumDefElts = NumElts - NumUndefElts;
3510 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3511
3512 // Don't perform this optimization when optimizing for size, since
3513 // materializing elements and inserting them tends to cause code bloat.
3514 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3515 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3516 ((MostCommonCount > DominantValueCountThreshold) ||
3517 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3518 // Start by splatting the most common element.
3519 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3520
3521 DenseSet<SDValue> Processed{DominantValue};
3522
3523 // We can handle an insert into the last element (of a splat) via
3524 // v(f)slide1down. This is slightly better than the vslideup insert
3525 // lowering as it avoids the need for a vector group temporary. It
3526 // is also better than using vmerge.vx as it avoids the need to
3527 // materialize the mask in a vector register.
3528 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3529 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3530 LastOp != DominantValue) {
3531 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3532 auto OpCode =
3534 if (!VT.isFloatingPoint())
3535 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3536 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3537 LastOp, Mask, VL);
3538 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3539 Processed.insert(LastOp);
3540 }
3541
3542 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3543 for (const auto &OpIdx : enumerate(Op->ops())) {
3544 const SDValue &V = OpIdx.value();
3545 if (V.isUndef() || !Processed.insert(V).second)
3546 continue;
3547 if (ValueCounts[V] == 1) {
3548 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3549 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3550 } else {
3551 // Blend in all instances of this value using a VSELECT, using a
3552 // mask where each bit signals whether that element is the one
3553 // we're after.
3555 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3556 return DAG.getConstant(V == V1, DL, XLenVT);
3557 });
3558 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3559 DAG.getBuildVector(SelMaskTy, DL, Ops),
3560 DAG.getSplatBuildVector(VT, DL, V), Vec);
3561 }
3562 }
3563
3564 return Vec;
3565 }
3566
3567 return SDValue();
3568}
3569
3571 const RISCVSubtarget &Subtarget) {
3572 MVT VT = Op.getSimpleValueType();
3573 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3574
3575 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3576
3577 SDLoc DL(Op);
3578 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3579
3580 MVT XLenVT = Subtarget.getXLenVT();
3581 unsigned NumElts = Op.getNumOperands();
3582
3583 if (VT.getVectorElementType() == MVT::i1) {
3584 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3585 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3586 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3587 }
3588
3589 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3590 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3591 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3592 }
3593
3594 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3595 // scalar integer chunks whose bit-width depends on the number of mask
3596 // bits and XLEN.
3597 // First, determine the most appropriate scalar integer type to use. This
3598 // is at most XLenVT, but may be shrunk to a smaller vector element type
3599 // according to the size of the final vector - use i8 chunks rather than
3600 // XLenVT if we're producing a v8i1. This results in more consistent
3601 // codegen across RV32 and RV64.
3602 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3603 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3604 // If we have to use more than one INSERT_VECTOR_ELT then this
3605 // optimization is likely to increase code size; avoid peforming it in
3606 // such a case. We can use a load from a constant pool in this case.
3607 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3608 return SDValue();
3609 // Now we can create our integer vector type. Note that it may be larger
3610 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3611 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3612 MVT IntegerViaVecVT =
3613 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3614 IntegerViaVecElts);
3615
3616 uint64_t Bits = 0;
3617 unsigned BitPos = 0, IntegerEltIdx = 0;
3618 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3619
3620 for (unsigned I = 0; I < NumElts;) {
3621 SDValue V = Op.getOperand(I);
3622 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3623 Bits |= ((uint64_t)BitValue << BitPos);
3624 ++BitPos;
3625 ++I;
3626
3627 // Once we accumulate enough bits to fill our scalar type or process the
3628 // last element, insert into our vector and clear our accumulated data.
3629 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3630 if (NumViaIntegerBits <= 32)
3631 Bits = SignExtend64<32>(Bits);
3632 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3633 Elts[IntegerEltIdx] = Elt;
3634 Bits = 0;
3635 BitPos = 0;
3636 IntegerEltIdx++;
3637 }
3638 }
3639
3640 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3641
3642 if (NumElts < NumViaIntegerBits) {
3643 // If we're producing a smaller vector than our minimum legal integer
3644 // type, bitcast to the equivalent (known-legal) mask type, and extract
3645 // our final mask.
3646 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3647 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3648 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3649 DAG.getConstant(0, DL, XLenVT));
3650 } else {
3651 // Else we must have produced an integer type with the same size as the
3652 // mask type; bitcast for the final result.
3653 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3654 Vec = DAG.getBitcast(VT, Vec);
3655 }
3656
3657 return Vec;
3658 }
3659
3660 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3661 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3663 if (!VT.isFloatingPoint())
3664 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3665 Splat =
3666 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3667 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3668 }
3669
3670 // Try and match index sequences, which we can lower to the vid instruction
3671 // with optional modifications. An all-undef vector is matched by
3672 // getSplatValue, above.
3673 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3674 int64_t StepNumerator = SimpleVID->StepNumerator;
3675 unsigned StepDenominator = SimpleVID->StepDenominator;
3676 int64_t Addend = SimpleVID->Addend;
3677
3678 assert(StepNumerator != 0 && "Invalid step");
3679 bool Negate = false;
3680 int64_t SplatStepVal = StepNumerator;
3681 unsigned StepOpcode = ISD::MUL;
3682 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3683 // anyway as the shift of 63 won't fit in uimm5.
3684 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3685 isPowerOf2_64(std::abs(StepNumerator))) {
3686 Negate = StepNumerator < 0;
3687 StepOpcode = ISD::SHL;
3688 SplatStepVal = Log2_64(std::abs(StepNumerator));
3689 }
3690
3691 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3692 // threshold since it's the immediate value many RVV instructions accept.
3693 // There is no vmul.vi instruction so ensure multiply constant can fit in
3694 // a single addi instruction.
3695 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3696 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3697 isPowerOf2_32(StepDenominator) &&
3698 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3699 MVT VIDVT =
3701 MVT VIDContainerVT =
3702 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3703 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3704 // Convert right out of the scalable type so we can use standard ISD
3705 // nodes for the rest of the computation. If we used scalable types with
3706 // these, we'd lose the fixed-length vector info and generate worse
3707 // vsetvli code.
3708 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3709 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3710 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3711 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3712 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3713 }
3714 if (StepDenominator != 1) {
3715 SDValue SplatStep =
3716 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3717 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3718 }
3719 if (Addend != 0 || Negate) {
3720 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3721 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3722 VID);
3723 }
3724 if (VT.isFloatingPoint()) {
3725 // TODO: Use vfwcvt to reduce register pressure.
3726 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3727 }
3728 return VID;
3729 }
3730 }
3731
3732 // For very small build_vectors, use a single scalar insert of a constant.
3733 // TODO: Base this on constant rematerialization cost, not size.
3734 const unsigned EltBitSize = VT.getScalarSizeInBits();
3735 if (VT.getSizeInBits() <= 32 &&
3737 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3738 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3739 "Unexpected sequence type");
3740 // If we can use the original VL with the modified element type, this
3741 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3742 // be moved into InsertVSETVLI?
3743 unsigned ViaVecLen =
3744 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3745 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3746
3747 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3748 uint64_t SplatValue = 0;
3749 // Construct the amalgamated value at this larger vector type.
3750 for (const auto &OpIdx : enumerate(Op->op_values())) {
3751 const auto &SeqV = OpIdx.value();
3752 if (!SeqV.isUndef())
3753 SplatValue |=
3754 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3755 }
3756
3757 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3758 // achieve better constant materializion.
3759 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3760 SplatValue = SignExtend64<32>(SplatValue);
3761
3762 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3763 DAG.getUNDEF(ViaVecVT),
3764 DAG.getConstant(SplatValue, DL, XLenVT),
3765 DAG.getVectorIdxConstant(0, DL));
3766 if (ViaVecLen != 1)
3768 MVT::getVectorVT(ViaIntVT, 1), Vec,
3769 DAG.getConstant(0, DL, XLenVT));
3770 return DAG.getBitcast(VT, Vec);
3771 }
3772
3773
3774 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3775 // when re-interpreted as a vector with a larger element type. For example,
3776 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3777 // could be instead splat as
3778 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3779 // TODO: This optimization could also work on non-constant splats, but it
3780 // would require bit-manipulation instructions to construct the splat value.
3781 SmallVector<SDValue> Sequence;
3782 const auto *BV = cast<BuildVectorSDNode>(Op);
3783 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3785 BV->getRepeatedSequence(Sequence) &&
3786 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3787 unsigned SeqLen = Sequence.size();
3788 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3789 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3790 ViaIntVT == MVT::i64) &&
3791 "Unexpected sequence type");
3792
3793 // If we can use the original VL with the modified element type, this
3794 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3795 // be moved into InsertVSETVLI?
3796 const unsigned RequiredVL = NumElts / SeqLen;
3797 const unsigned ViaVecLen =
3798 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3799 NumElts : RequiredVL;
3800 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3801
3802 unsigned EltIdx = 0;
3803 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3804 uint64_t SplatValue = 0;
3805 // Construct the amalgamated value which can be splatted as this larger
3806 // vector type.
3807 for (const auto &SeqV : Sequence) {
3808 if (!SeqV.isUndef())
3809 SplatValue |=
3810 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3811 EltIdx++;
3812 }
3813
3814 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3815 // achieve better constant materializion.
3816 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3817 SplatValue = SignExtend64<32>(SplatValue);
3818
3819 // Since we can't introduce illegal i64 types at this stage, we can only
3820 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3821 // way we can use RVV instructions to splat.
3822 assert((ViaIntVT.bitsLE(XLenVT) ||
3823 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3824 "Unexpected bitcast sequence");
3825 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3826 SDValue ViaVL =
3827 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3828 MVT ViaContainerVT =
3829 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3830 SDValue Splat =
3831 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3832 DAG.getUNDEF(ViaContainerVT),
3833 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3834 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3835 if (ViaVecLen != RequiredVL)
3837 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3838 DAG.getConstant(0, DL, XLenVT));
3839 return DAG.getBitcast(VT, Splat);
3840 }
3841 }
3842
3843 // If the number of signbits allows, see if we can lower as a <N x i8>.
3844 // Our main goal here is to reduce LMUL (and thus work) required to
3845 // build the constant, but we will also narrow if the resulting
3846 // narrow vector is known to materialize cheaply.
3847 // TODO: We really should be costing the smaller vector. There are
3848 // profitable cases this misses.
3849 if (EltBitSize > 8 && VT.isInteger() &&
3850 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3851 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3852 if (EltBitSize - SignBits < 8) {
3853 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3854 DL, Op->ops());
3855 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3856 Source, DAG, Subtarget);
3857 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3858 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3859 }
3860 }
3861
3862 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3863 return Res;
3864
3865 // For constant vectors, use generic constant pool lowering. Otherwise,
3866 // we'd have to materialize constants in GPRs just to move them into the
3867 // vector.
3868 return SDValue();
3869}
3870
3872 const RISCVSubtarget &Subtarget) {
3873 MVT VT = Op.getSimpleValueType();
3874 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3875
3876 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3878 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3879
3880 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3881
3882 SDLoc DL(Op);
3883 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3884
3885 MVT XLenVT = Subtarget.getXLenVT();
3886
3887 if (VT.getVectorElementType() == MVT::i1) {
3888 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3889 // vector type, we have a legal equivalently-sized i8 type, so we can use
3890 // that.
3891 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3892 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3893
3894 SDValue WideVec;
3895 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3896 // For a splat, perform a scalar truncate before creating the wider
3897 // vector.
3898 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3899 DAG.getConstant(1, DL, Splat.getValueType()));
3900 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3901 } else {
3902 SmallVector<SDValue, 8> Ops(Op->op_values());
3903 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3904 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3905 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3906 }
3907
3908 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3909 }
3910
3911 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3912 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3913 return Gather;
3914 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3916 if (!VT.isFloatingPoint())
3917 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3918 Splat =
3919 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3920 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3921 }
3922
3923 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3924 return Res;
3925
3926 // If we're compiling for an exact VLEN value, we can split our work per
3927 // register in the register group.
3928 if (const auto VLen = Subtarget.getRealVLen();
3929 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3930 MVT ElemVT = VT.getVectorElementType();
3931 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3932 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3933 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3934 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3935 assert(M1VT == getLMUL1VT(M1VT));
3936
3937 // The following semantically builds up a fixed length concat_vector
3938 // of the component build_vectors. We eagerly lower to scalable and
3939 // insert_subvector here to avoid DAG combining it back to a large
3940 // build_vector.
3941 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3942 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3943 SDValue Vec = DAG.getUNDEF(ContainerVT);
3944 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3945 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3946 SDValue SubBV =
3947 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3948 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3949 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3950 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3951 DAG.getVectorIdxConstant(InsertIdx, DL));
3952 }
3953 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3954 }
3955
3956 // For m1 vectors, if we have non-undef values in both halves of our vector,
3957 // split the vector into low and high halves, build them separately, then
3958 // use a vselect to combine them. For long vectors, this cuts the critical
3959 // path of the vslide1down sequence in half, and gives us an opportunity
3960 // to special case each half independently. Note that we don't change the
3961 // length of the sub-vectors here, so if both fallback to the generic
3962 // vslide1down path, we should be able to fold the vselect into the final
3963 // vslidedown (for the undef tail) for the first half w/ masking.
3964 unsigned NumElts = VT.getVectorNumElements();
3965 unsigned NumUndefElts =
3966 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3967 unsigned NumDefElts = NumElts - NumUndefElts;
3968 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3969 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3970 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3971 SmallVector<SDValue> MaskVals;
3972 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
3973 SubVecAOps.reserve(NumElts);
3974 SubVecBOps.reserve(NumElts);
3975 for (unsigned i = 0; i < NumElts; i++) {
3976 SDValue Elem = Op->getOperand(i);
3977 if (i < NumElts / 2) {
3978 SubVecAOps.push_back(Elem);
3979 SubVecBOps.push_back(UndefElem);
3980 } else {
3981 SubVecAOps.push_back(UndefElem);
3982 SubVecBOps.push_back(Elem);
3983 }
3984 bool SelectMaskVal = (i < NumElts / 2);
3985 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3986 }
3987 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3988 MaskVals.size() == NumElts);
3989
3990 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
3991 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
3992 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3993 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3994 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
3995 }
3996
3997 // Cap the cost at a value linear to the number of elements in the vector.
3998 // The default lowering is to use the stack. The vector store + scalar loads
3999 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4000 // being (at least) linear in LMUL. As a result, using the vslidedown
4001 // lowering for every element ends up being VL*LMUL..
4002 // TODO: Should we be directly costing the stack alternative? Doing so might
4003 // give us a more accurate upper bound.
4004 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4005
4006 // TODO: unify with TTI getSlideCost.
4007 InstructionCost PerSlideCost = 1;
4008 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4009 default: break;
4011 PerSlideCost = 2;
4012 break;
4014 PerSlideCost = 4;
4015 break;
4017 PerSlideCost = 8;
4018 break;
4019 }
4020
4021 // TODO: Should we be using the build instseq then cost + evaluate scheme
4022 // we use for integer constants here?
4023 unsigned UndefCount = 0;
4024 for (const SDValue &V : Op->ops()) {
4025 if (V.isUndef()) {
4026 UndefCount++;
4027 continue;
4028 }
4029 if (UndefCount) {
4030 LinearBudget -= PerSlideCost;
4031 UndefCount = 0;
4032 }
4033 LinearBudget -= PerSlideCost;
4034 }
4035 if (UndefCount) {
4036 LinearBudget -= PerSlideCost;
4037 }
4038
4039 if (LinearBudget < 0)
4040 return SDValue();
4041
4042 assert((!VT.isFloatingPoint() ||
4043 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4044 "Illegal type which will result in reserved encoding");
4045
4046 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4047
4048 SDValue Vec;
4049 UndefCount = 0;
4050 for (SDValue V : Op->ops()) {
4051 if (V.isUndef()) {
4052 UndefCount++;
4053 continue;
4054 }
4055
4056 // Start our sequence with a TA splat in the hopes that hardware is able to
4057 // recognize there's no dependency on the prior value of our temporary
4058 // register.
4059 if (!Vec) {
4060 Vec = DAG.getSplatVector(VT, DL, V);
4061 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4062 UndefCount = 0;
4063 continue;
4064 }
4065
4066 if (UndefCount) {
4067 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4068 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4069 Vec, Offset, Mask, VL, Policy);
4070 UndefCount = 0;
4071 }
4072 auto OpCode =
4074 if (!VT.isFloatingPoint())
4075 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4076 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4077 V, Mask, VL);
4078 }
4079 if (UndefCount) {
4080 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4081 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4082 Vec, Offset, Mask, VL, Policy);
4083 }
4084 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4085}
4086
4087static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4089 SelectionDAG &DAG) {
4090 if (!Passthru)
4091 Passthru = DAG.getUNDEF(VT);
4092 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4093 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4094 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4095 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4096 // node in order to try and match RVV vector/scalar instructions.
4097 if ((LoC >> 31) == HiC)
4098 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4099
4100 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4101 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4102 // vlmax vsetvli or vsetivli to change the VL.
4103 // FIXME: Support larger constants?
4104 // FIXME: Support non-constant VLs by saturating?
4105 if (LoC == HiC) {
4106 SDValue NewVL;
4107 if (isAllOnesConstant(VL) ||
4108 (isa<RegisterSDNode>(VL) &&
4109 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4110 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4111 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4112 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4113
4114 if (NewVL) {
4115 MVT InterVT =
4116 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4117 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4118 DAG.getUNDEF(InterVT), Lo, NewVL);
4119 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4120 }
4121 }
4122 }
4123
4124 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4125 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4126 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4127 Hi.getConstantOperandVal(1) == 31)
4128 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4129
4130 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4131 // even if it might be sign extended.
4132 if (Hi.isUndef())
4133 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4134
4135 // Fall back to a stack store and stride x0 vector load.
4136 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4137 Hi, VL);
4138}
4139
4140// Called by type legalization to handle splat of i64 on RV32.
4141// FIXME: We can optimize this when the type has sign or zero bits in one
4142// of the halves.
4143static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4144 SDValue Scalar, SDValue VL,
4145 SelectionDAG &DAG) {
4146 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4147 SDValue Lo, Hi;
4148 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4149 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4150}
4151
4152// This function lowers a splat of a scalar operand Splat with the vector
4153// length VL. It ensures the final sequence is type legal, which is useful when
4154// lowering a splat after type legalization.
4155static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4156 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4157 const RISCVSubtarget &Subtarget) {
4158 bool HasPassthru = Passthru && !Passthru.isUndef();
4159 if (!HasPassthru && !Passthru)
4160 Passthru = DAG.getUNDEF(VT);
4161 if (VT.isFloatingPoint())
4162 return DAG.getNode(RISCVISD::VFMV_V_F_VL,