LLVM 18.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/Support/Debug.h"
46#include <optional>
47
48using namespace llvm;
49
50#define DEBUG_TYPE "riscv-lower"
51
52STATISTIC(NumTailCalls, "Number of tail calls");
53
55 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
56 cl::desc("Give the maximum size (in number of nodes) of the web of "
57 "instructions that we will consider for VW expansion"),
58 cl::init(18));
59
60static cl::opt<bool>
61 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
62 cl::desc("Allow the formation of VW_W operations (e.g., "
63 "VWADD_W) with splat constants"),
64 cl::init(false));
65
67 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
68 cl::desc("Set the minimum number of repetitions of a divisor to allow "
69 "transformation to multiplications by the reciprocal"),
70 cl::init(2));
71
72static cl::opt<int>
74 cl::desc("Give the maximum number of instructions that we will "
75 "use for creating a floating-point immediate value"),
76 cl::init(2));
77
78static cl::opt<bool>
79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
80 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
81
83 const RISCVSubtarget &STI)
84 : TargetLowering(TM), Subtarget(STI) {
85
86 if (Subtarget.isRVE())
87 report_fatal_error("Codegen not yet implemented for RVE");
88
89 RISCVABI::ABI ABI = Subtarget.getTargetABI();
90 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
91
92 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
93 !Subtarget.hasStdExtF()) {
94 errs() << "Hard-float 'f' ABI can't be used for a target that "
95 "doesn't support the F instruction set extension (ignoring "
96 "target-abi)\n";
98 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
99 !Subtarget.hasStdExtD()) {
100 errs() << "Hard-float 'd' ABI can't be used for a target that "
101 "doesn't support the D instruction set extension (ignoring "
102 "target-abi)\n";
103 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
104 }
105
106 switch (ABI) {
107 default:
108 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhOrZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxOrZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267
269
271
272 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
273 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
274
275 if (Subtarget.is64Bit()) {
277
278 if (!RV64LegalI32) {
281 MVT::i32, Custom);
284 MVT::i32, Custom);
285 }
286 } else {
288 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
289 nullptr);
290 setLibcallName(RTLIB::MULO_I64, nullptr);
291 }
292
293 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
295 if (RV64LegalI32 && Subtarget.is64Bit())
297 } else if (Subtarget.is64Bit()) {
299 if (!RV64LegalI32)
301 } else {
303 }
304
305 if (!Subtarget.hasStdExtM()) {
307 XLenVT, Expand);
308 if (RV64LegalI32 && Subtarget.is64Bit())
310 Promote);
311 } else if (Subtarget.is64Bit()) {
312 if (!RV64LegalI32)
314 {MVT::i8, MVT::i16, MVT::i32}, Custom);
315 }
316
317 if (RV64LegalI32 && Subtarget.is64Bit()) {
321 Expand);
322 }
323
326 Expand);
327
329 Custom);
330
331 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
332 if (!RV64LegalI32 && Subtarget.is64Bit())
334 } else if (Subtarget.hasVendorXTHeadBb()) {
335 if (Subtarget.is64Bit())
338 } else {
340 if (RV64LegalI32 && Subtarget.is64Bit())
342 }
343
344 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
345 // pattern match it directly in isel.
347 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
348 Subtarget.hasVendorXTHeadBb())
349 ? Legal
350 : Expand);
351 if (RV64LegalI32 && Subtarget.is64Bit())
353 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
354 Subtarget.hasVendorXTHeadBb())
355 ? Promote
356 : Expand);
357
358 // Zbkb can use rev8+brev8 to implement bitreverse.
360 Subtarget.hasStdExtZbkb() ? Custom : Expand);
361
362 if (Subtarget.hasStdExtZbb()) {
364 Legal);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 Promote);
368
369 if (Subtarget.is64Bit()) {
370 if (RV64LegalI32)
372 else
374 }
375 } else {
377 if (RV64LegalI32 && Subtarget.is64Bit())
379 }
380
381 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb()) {
382 // We need the custom lowering to make sure that the resulting sequence
383 // for the 32bit case is efficient on 64bit targets.
384 if (Subtarget.is64Bit()) {
385 if (RV64LegalI32) {
387 Subtarget.hasStdExtZbb() ? Legal : Promote);
388 if (!Subtarget.hasStdExtZbb())
390 } else
392 }
393 } else {
395 if (RV64LegalI32 && Subtarget.is64Bit())
397 }
398
399 if (!RV64LegalI32 && Subtarget.is64Bit() &&
400 !Subtarget.hasShortForwardBranchOpt())
402
403 // We can use PseudoCCSUB to implement ABS.
404 if (Subtarget.hasShortForwardBranchOpt())
406
407 if (!Subtarget.hasVendorXTHeadCondMov())
409
410 if (RV64LegalI32 && Subtarget.is64Bit())
412
413 static const unsigned FPLegalNodeTypes[] = {
420
421 static const ISD::CondCode FPCCToExpand[] = {
425
426 static const unsigned FPOpToExpand[] = {
428 ISD::FREM};
429
430 static const unsigned FPRndMode[] = {
433
436
437 static const unsigned ZfhminZfbfminPromoteOps[] = {
447
448 if (Subtarget.hasStdExtZfbfmin()) {
457 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
459 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
460 // DAGCombiner::visitFP_ROUND probably needs improvements first.
462 }
463
465 if (Subtarget.hasStdExtZfhOrZhinx()) {
466 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
467 setOperationAction(FPRndMode, MVT::f16,
468 Subtarget.hasStdExtZfa() ? Legal : Custom);
471 } else {
472 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
475 MVT::f16, Legal);
476 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
477 // DAGCombiner::visitFP_ROUND probably needs improvements first.
479 }
480
483 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
486
488 Subtarget.hasStdExtZfa() ? Legal : Promote);
493 MVT::f16, Promote);
494
495 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
496 // complete support for all operations in LegalizeDAG.
501 MVT::f16, Promote);
502
503 // We need to custom promote this.
504 if (Subtarget.is64Bit())
506
507 if (!Subtarget.hasStdExtZfa())
509 }
510
511 if (Subtarget.hasStdExtFOrZfinx()) {
512 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
513 setOperationAction(FPRndMode, MVT::f32,
514 Subtarget.hasStdExtZfa() ? Legal : Custom);
515 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
519 setOperationAction(FPOpToExpand, MVT::f32, Expand);
520 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
521 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
522 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
523 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
527 Subtarget.isSoftFPABI() ? LibCall : Custom);
530
531 if (Subtarget.hasStdExtZfa())
533 else
535 }
536
537 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
539
540 if (Subtarget.hasStdExtDOrZdinx()) {
541 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
542
543 if (Subtarget.hasStdExtZfa()) {
544 setOperationAction(FPRndMode, MVT::f64, Legal);
548 } else {
549 if (Subtarget.is64Bit())
550 setOperationAction(FPRndMode, MVT::f64, Custom);
551
553 }
554
557 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
561 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
562 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
563 setOperationAction(FPOpToExpand, MVT::f64, Expand);
564 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
565 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
566 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
567 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
571 Subtarget.isSoftFPABI() ? LibCall : Custom);
574 }
575
576 if (Subtarget.is64Bit()) {
579 MVT::i32, Custom);
581 }
582
583 if (Subtarget.hasStdExtFOrZfinx()) {
585 Custom);
586
589 XLenVT, Legal);
590
591 if (RV64LegalI32 && Subtarget.is64Bit())
594 MVT::i32, Legal);
595
598 }
599
602 XLenVT, Custom);
603
605
606 if (Subtarget.is64Bit())
608
609 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
610 // Unfortunately this can't be determined just from the ISA naming string.
612 Subtarget.is64Bit() ? Legal : Custom);
613
616 if (Subtarget.is64Bit())
618
619 if (Subtarget.hasStdExtZicbop()) {
621 }
622
623 if (Subtarget.hasStdExtA()) {
626 } else if (Subtarget.hasForcedAtomics()) {
628 } else {
630 }
631
633
635
636 if (Subtarget.hasVInstructions()) {
638
640 if (RV64LegalI32 && Subtarget.is64Bit())
642
643 // RVV intrinsics may have illegal operands.
644 // We also need to custom legalize vmv.x.s.
647 {MVT::i8, MVT::i16}, Custom);
648 if (Subtarget.is64Bit())
650 MVT::i32, Custom);
651 else
653 MVT::i64, Custom);
654
656 MVT::Other, Custom);
657
658 static const unsigned IntegerVPOps[] = {
659 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
660 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
661 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
662 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
663 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
664 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
665 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
666 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
667 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
668 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
669 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
670 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE};
671
672 static const unsigned FloatingPointVPOps[] = {
673 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
674 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
675 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
676 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
677 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
678 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
679 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
680 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
681 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
682 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
683 ISD::EXPERIMENTAL_VP_REVERSE};
684
685 static const unsigned IntegerVecReduceOps[] = {
689
690 static const unsigned FloatingPointVecReduceOps[] = {
693
694 if (!Subtarget.is64Bit()) {
695 // We must custom-lower certain vXi64 operations on RV32 due to the vector
696 // element type being illegal.
698 MVT::i64, Custom);
699
700 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
701
702 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
703 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
704 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
705 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
706 MVT::i64, Custom);
707 }
708
709 for (MVT VT : BoolVecVTs) {
710 if (!isTypeLegal(VT))
711 continue;
712
714
715 // Mask VTs are custom-expanded into a series of standard nodes
719 VT, Custom);
720
722 Custom);
723
726 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
727 Expand);
728
729 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
730
733 Custom);
734
736 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
737 Custom);
738
739 // RVV has native int->float & float->int conversions where the
740 // element type sizes are within one power-of-two of each other. Any
741 // wider distances between type sizes have to be lowered as sequences
742 // which progressively narrow the gap in stages.
747 VT, Custom);
749 Custom);
750
751 // Expand all extending loads to types larger than this, and truncating
752 // stores from types larger than this.
754 setTruncStoreAction(VT, OtherVT, Expand);
756 OtherVT, Expand);
757 }
758
759 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
760 ISD::VP_TRUNCATE, ISD::VP_SETCC},
761 VT, Custom);
762
765
767
768 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
769
772 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
773 }
774
775 for (MVT VT : IntVecVTs) {
776 if (!isTypeLegal(VT))
777 continue;
778
781
782 // Vectors implement MULHS/MULHU.
784
785 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
786 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
788
790 Legal);
791
792 // Custom-lower extensions and truncations from/to mask types.
794 VT, Custom);
795
796 // RVV has native int->float & float->int conversions where the
797 // element type sizes are within one power-of-two of each other. Any
798 // wider distances between type sizes have to be lowered as sequences
799 // which progressively narrow the gap in stages.
804 VT, Custom);
806 Custom);
810
811 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
812 // nodes which truncate by one power of two at a time.
814
815 // Custom-lower insert/extract operations to simplify patterns.
817 Custom);
818
819 // Custom-lower reduction operations to set up the corresponding custom
820 // nodes' operands.
821 setOperationAction(IntegerVecReduceOps, VT, Custom);
822
823 setOperationAction(IntegerVPOps, VT, Custom);
824
826
828 VT, Custom);
829
831 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
832 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
833 VT, Custom);
834
837 VT, Custom);
838
841
843
845 setTruncStoreAction(VT, OtherVT, Expand);
847 OtherVT, Expand);
848 }
849
852
853 // Splice
855
856 if (Subtarget.hasStdExtZvkb()) {
858 setOperationAction(ISD::VP_BSWAP, VT, Custom);
859 } else {
860 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
862 }
863
864 if (Subtarget.hasStdExtZvbb()) {
866 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
867 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
868 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
869 VT, Custom);
870 } else {
871 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
873 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
874 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
875 VT, Expand);
876
877 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
878 // range of f32.
879 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
880 if (isTypeLegal(FloatVT)) {
882 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
883 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
884 VT, Custom);
885 }
886 }
887 }
888
889 // Expand various CCs to best match the RVV ISA, which natively supports UNE
890 // but no other unordered comparisons, and supports all ordered comparisons
891 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
892 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
893 // and we pattern-match those back to the "original", swapping operands once
894 // more. This way we catch both operations and both "vf" and "fv" forms with
895 // fewer patterns.
896 static const ISD::CondCode VFPCCToExpand[] = {
900 };
901
902 // TODO: support more ops.
903 static const unsigned ZvfhminPromoteOps[] = {
911
912 // TODO: support more vp ops.
913 static const unsigned ZvfhminPromoteVPOps[] = {
914 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
915 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
916 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
917 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
918 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
919 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
920 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
921 ISD::VP_FNEARBYINT, ISD::VP_SETCC};
922
923 // Sets common operation actions on RVV floating-point vector types.
924 const auto SetCommonVFPActions = [&](MVT VT) {
926 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
927 // sizes are within one power-of-two of each other. Therefore conversions
928 // between vXf16 and vXf64 must be lowered as sequences which convert via
929 // vXf32.
931 // Custom-lower insert/extract operations to simplify patterns.
933 Custom);
934 // Expand various condition codes (explained above).
935 setCondCodeAction(VFPCCToExpand, VT, Expand);
936
939
943 VT, Custom);
944
945 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
946
947 // Expand FP operations that need libcalls.
959
961
963
965 VT, Custom);
966
968 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
969 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
970 VT, Custom);
971
974
977 VT, Custom);
978
981
983
984 setOperationAction(FloatingPointVPOps, VT, Custom);
985
987 Custom);
990 VT, Legal);
995 VT, Custom);
996 };
997
998 // Sets common extload/truncstore actions on RVV floating-point vector
999 // types.
1000 const auto SetCommonVFPExtLoadTruncStoreActions =
1001 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1002 for (auto SmallVT : SmallerVTs) {
1003 setTruncStoreAction(VT, SmallVT, Expand);
1004 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1005 }
1006 };
1007
1008 if (Subtarget.hasVInstructionsF16()) {
1009 for (MVT VT : F16VecVTs) {
1010 if (!isTypeLegal(VT))
1011 continue;
1012 SetCommonVFPActions(VT);
1013 }
1014 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1015 for (MVT VT : F16VecVTs) {
1016 if (!isTypeLegal(VT))
1017 continue;
1020 Custom);
1021 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1022 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1023 Custom);
1026 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1027 VT, Custom);
1030 VT, Custom);
1032 // load/store
1034
1035 // Custom split nxv32f16 since nxv32f32 if not legal.
1036 if (VT == MVT::nxv32f16) {
1037 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1038 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1039 continue;
1040 }
1041 // Add more promote ops.
1042 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1043 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1044 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1045 }
1046 }
1047
1048 if (Subtarget.hasVInstructionsF32()) {
1049 for (MVT VT : F32VecVTs) {
1050 if (!isTypeLegal(VT))
1051 continue;
1052 SetCommonVFPActions(VT);
1053 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1054 }
1055 }
1056
1057 if (Subtarget.hasVInstructionsF64()) {
1058 for (MVT VT : F64VecVTs) {
1059 if (!isTypeLegal(VT))
1060 continue;
1061 SetCommonVFPActions(VT);
1062 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1063 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1064 }
1065 }
1066
1067 if (Subtarget.useRVVForFixedLengthVectors()) {
1069 if (!useRVVForFixedLengthVectorVT(VT))
1070 continue;
1071
1072 // By default everything must be expanded.
1073 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1076 setTruncStoreAction(VT, OtherVT, Expand);
1078 OtherVT, Expand);
1079 }
1080
1081 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1082 // expansion to a build_vector of 0s.
1084
1085 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1087 Custom);
1088
1090 Custom);
1091
1093 VT, Custom);
1094
1096
1098
1100
1102
1104
1106
1109 Custom);
1110
1112 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1113 Custom);
1114
1116 {
1125 },
1126 VT, Custom);
1128 Custom);
1129
1131
1132 // Operations below are different for between masks and other vectors.
1133 if (VT.getVectorElementType() == MVT::i1) {
1134 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1135 ISD::OR, ISD::XOR},
1136 VT, Custom);
1137
1138 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1139 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1140 VT, Custom);
1141
1142 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1143 continue;
1144 }
1145
1146 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1147 // it before type legalization for i64 vectors on RV32. It will then be
1148 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1149 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1150 // improvements first.
1151 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1154 }
1155
1158
1159 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1160 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1161 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1162 ISD::VP_SCATTER},
1163 VT, Custom);
1164
1168 VT, Custom);
1169
1172
1173 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1174 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1176
1179 Custom);
1180
1183
1186
1187 // Custom-lower reduction operations to set up the corresponding custom
1188 // nodes' operands.
1192 VT, Custom);
1193
1194 setOperationAction(IntegerVPOps, VT, Custom);
1195
1196 if (Subtarget.hasStdExtZvkb())
1198
1199 if (Subtarget.hasStdExtZvbb()) {
1202 VT, Custom);
1203 } else {
1204 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1205 // range of f32.
1206 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1207 if (isTypeLegal(FloatVT))
1210 Custom);
1211 }
1212 }
1213
1215 // There are no extending loads or truncating stores.
1216 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1217 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1218 setTruncStoreAction(VT, InnerVT, Expand);
1219 }
1220
1221 if (!useRVVForFixedLengthVectorVT(VT))
1222 continue;
1223
1224 // By default everything must be expanded.
1225 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1227
1228 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1229 // expansion to a build_vector of 0s.
1231
1232 if (VT.getVectorElementType() == MVT::f16 &&
1233 !Subtarget.hasVInstructionsF16()) {
1236 Custom);
1237 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1239 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1240 Custom);
1242 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1243 VT, Custom);
1246 VT, Custom);
1249 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1250 // Don't promote f16 vector operations to f32 if f32 vector type is
1251 // not legal.
1252 // TODO: could split the f16 vector into two vectors and do promotion.
1253 if (!isTypeLegal(F32VecVT))
1254 continue;
1255 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1256 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1257 continue;
1258 }
1259
1260 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1262 Custom);
1263
1267 VT, Custom);
1268
1271 VT, Custom);
1272
1273 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1274 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1275 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1276 ISD::VP_SCATTER},
1277 VT, Custom);
1278
1283 VT, Custom);
1284
1286
1289 VT, Custom);
1290
1291 setCondCodeAction(VFPCCToExpand, VT, Expand);
1292
1296
1298
1299 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1300
1301 setOperationAction(FloatingPointVPOps, VT, Custom);
1302
1304 Custom);
1311 VT, Custom);
1312 }
1313
1314 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1315 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1316 Custom);
1319 if (Subtarget.hasStdExtFOrZfinx())
1321 if (Subtarget.hasStdExtDOrZdinx())
1323 }
1324 }
1325
1326 if (Subtarget.hasStdExtA()) {
1328 if (RV64LegalI32 && Subtarget.is64Bit())
1330 }
1331
1332 if (Subtarget.hasForcedAtomics()) {
1333 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1339 XLenVT, LibCall);
1340 }
1341
1342 if (Subtarget.hasVendorXTHeadMemIdx()) {
1343 for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
1344 ++im) {
1345 setIndexedLoadAction(im, MVT::i8, Legal);
1346 setIndexedStoreAction(im, MVT::i8, Legal);
1347 setIndexedLoadAction(im, MVT::i16, Legal);
1348 setIndexedStoreAction(im, MVT::i16, Legal);
1349 setIndexedLoadAction(im, MVT::i32, Legal);
1350 setIndexedStoreAction(im, MVT::i32, Legal);
1351
1352 if (Subtarget.is64Bit()) {
1353 setIndexedLoadAction(im, MVT::i64, Legal);
1354 setIndexedStoreAction(im, MVT::i64, Legal);
1355 }
1356 }
1357 }
1358
1359 // Function alignments.
1360 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1361 setMinFunctionAlignment(FunctionAlignment);
1362 // Set preferred alignments.
1365
1366 // Jumps are expensive, compared to logic
1368
1372 if (Subtarget.is64Bit())
1374
1375 if (Subtarget.hasStdExtFOrZfinx())
1377
1378 if (Subtarget.hasStdExtZbb())
1380
1381 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1383
1384 if (Subtarget.hasStdExtZbkb())
1388 if (Subtarget.hasStdExtFOrZfinx())
1391 if (Subtarget.hasVInstructions())
1393 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1396 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1398 if (Subtarget.hasVendorXTHeadMemPair())
1400 if (Subtarget.useRVVForFixedLengthVectors())
1402
1403 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1404 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1405
1406 // Disable strict node mutation.
1407 IsStrictFPEnabled = true;
1408}
1409
1411 LLVMContext &Context,
1412 EVT VT) const {
1413 if (!VT.isVector())
1414 return getPointerTy(DL);
1415 if (Subtarget.hasVInstructions() &&
1416 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1417 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1419}
1420
1421MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1422 return Subtarget.getXLenVT();
1423}
1424
1425// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1426bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1427 unsigned VF,
1428 bool IsScalable) const {
1429 if (!Subtarget.hasVInstructions())
1430 return true;
1431
1432 if (!IsScalable)
1433 return true;
1434
1435 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1436 return true;
1437
1438 // Don't allow VF=1 if those types are't legal.
1439 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1440 return true;
1441
1442 // VLEN=32 support is incomplete.
1443 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1444 return true;
1445
1446 // The maximum VF is for the smallest element width with LMUL=8.
1447 // VF must be a power of 2.
1448 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1449 return VF > MaxVF || !isPowerOf2_32(VF);
1450}
1451
1453 const CallInst &I,
1454 MachineFunction &MF,
1455 unsigned Intrinsic) const {
1456 auto &DL = I.getModule()->getDataLayout();
1457
1458 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1459 bool IsUnitStrided) {
1461 Info.ptrVal = I.getArgOperand(PtrOp);
1462 Type *MemTy;
1463 if (IsStore) {
1464 // Store value is the first operand.
1465 MemTy = I.getArgOperand(0)->getType();
1466 } else {
1467 // Use return type. If it's segment load, return type is a struct.
1468 MemTy = I.getType();
1469 if (MemTy->isStructTy())
1470 MemTy = MemTy->getStructElementType(0);
1471 }
1472 if (!IsUnitStrided)
1473 MemTy = MemTy->getScalarType();
1474
1475 Info.memVT = getValueType(DL, MemTy);
1476 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1478 Info.flags |=
1480 return true;
1481 };
1482
1483 if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
1485
1487 switch (Intrinsic) {
1488 default:
1489 return false;
1490 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1491 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1492 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1493 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1494 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1495 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1496 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1497 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1498 case Intrinsic::riscv_masked_cmpxchg_i32:
1500 Info.memVT = MVT::i32;
1501 Info.ptrVal = I.getArgOperand(0);
1502 Info.offset = 0;
1503 Info.align = Align(4);
1506 return true;
1507 case Intrinsic::riscv_masked_strided_load:
1508 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1509 /*IsUnitStrided*/ false);
1510 case Intrinsic::riscv_masked_strided_store:
1511 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1512 /*IsUnitStrided*/ false);
1513 case Intrinsic::riscv_seg2_load:
1514 case Intrinsic::riscv_seg3_load:
1515 case Intrinsic::riscv_seg4_load:
1516 case Intrinsic::riscv_seg5_load:
1517 case Intrinsic::riscv_seg6_load:
1518 case Intrinsic::riscv_seg7_load:
1519 case Intrinsic::riscv_seg8_load:
1520 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1521 /*IsUnitStrided*/ false);
1522 case Intrinsic::riscv_seg2_store:
1523 case Intrinsic::riscv_seg3_store:
1524 case Intrinsic::riscv_seg4_store:
1525 case Intrinsic::riscv_seg5_store:
1526 case Intrinsic::riscv_seg6_store:
1527 case Intrinsic::riscv_seg7_store:
1528 case Intrinsic::riscv_seg8_store:
1529 // Operands are (vec, ..., vec, ptr, vl)
1530 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1531 /*IsStore*/ true,
1532 /*IsUnitStrided*/ false);
1533 case Intrinsic::riscv_vle:
1534 case Intrinsic::riscv_vle_mask:
1535 case Intrinsic::riscv_vleff:
1536 case Intrinsic::riscv_vleff_mask:
1537 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1538 /*IsStore*/ false,
1539 /*IsUnitStrided*/ true);
1540 case Intrinsic::riscv_vse:
1541 case Intrinsic::riscv_vse_mask:
1542 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1543 /*IsStore*/ true,
1544 /*IsUnitStrided*/ true);
1545 case Intrinsic::riscv_vlse:
1546 case Intrinsic::riscv_vlse_mask:
1547 case Intrinsic::riscv_vloxei:
1548 case Intrinsic::riscv_vloxei_mask:
1549 case Intrinsic::riscv_vluxei:
1550 case Intrinsic::riscv_vluxei_mask:
1551 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1552 /*IsStore*/ false,
1553 /*IsUnitStrided*/ false);
1554 case Intrinsic::riscv_vsse:
1555 case Intrinsic::riscv_vsse_mask:
1556 case Intrinsic::riscv_vsoxei:
1557 case Intrinsic::riscv_vsoxei_mask:
1558 case Intrinsic::riscv_vsuxei:
1559 case Intrinsic::riscv_vsuxei_mask:
1560 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1561 /*IsStore*/ true,
1562 /*IsUnitStrided*/ false);
1563 case Intrinsic::riscv_vlseg2:
1564 case Intrinsic::riscv_vlseg3:
1565 case Intrinsic::riscv_vlseg4:
1566 case Intrinsic::riscv_vlseg5:
1567 case Intrinsic::riscv_vlseg6:
1568 case Intrinsic::riscv_vlseg7:
1569 case Intrinsic::riscv_vlseg8:
1570 case Intrinsic::riscv_vlseg2ff:
1571 case Intrinsic::riscv_vlseg3ff:
1572 case Intrinsic::riscv_vlseg4ff:
1573 case Intrinsic::riscv_vlseg5ff:
1574 case Intrinsic::riscv_vlseg6ff:
1575 case Intrinsic::riscv_vlseg7ff:
1576 case Intrinsic::riscv_vlseg8ff:
1577 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1578 /*IsStore*/ false,
1579 /*IsUnitStrided*/ false);
1580 case Intrinsic::riscv_vlseg2_mask:
1581 case Intrinsic::riscv_vlseg3_mask:
1582 case Intrinsic::riscv_vlseg4_mask:
1583 case Intrinsic::riscv_vlseg5_mask:
1584 case Intrinsic::riscv_vlseg6_mask:
1585 case Intrinsic::riscv_vlseg7_mask:
1586 case Intrinsic::riscv_vlseg8_mask:
1587 case Intrinsic::riscv_vlseg2ff_mask:
1588 case Intrinsic::riscv_vlseg3ff_mask:
1589 case Intrinsic::riscv_vlseg4ff_mask:
1590 case Intrinsic::riscv_vlseg5ff_mask:
1591 case Intrinsic::riscv_vlseg6ff_mask:
1592 case Intrinsic::riscv_vlseg7ff_mask:
1593 case Intrinsic::riscv_vlseg8ff_mask:
1594 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1595 /*IsStore*/ false,
1596 /*IsUnitStrided*/ false);
1597 case Intrinsic::riscv_vlsseg2:
1598 case Intrinsic::riscv_vlsseg3:
1599 case Intrinsic::riscv_vlsseg4:
1600 case Intrinsic::riscv_vlsseg5:
1601 case Intrinsic::riscv_vlsseg6:
1602 case Intrinsic::riscv_vlsseg7:
1603 case Intrinsic::riscv_vlsseg8:
1604 case Intrinsic::riscv_vloxseg2:
1605 case Intrinsic::riscv_vloxseg3:
1606 case Intrinsic::riscv_vloxseg4:
1607 case Intrinsic::riscv_vloxseg5:
1608 case Intrinsic::riscv_vloxseg6:
1609 case Intrinsic::riscv_vloxseg7:
1610 case Intrinsic::riscv_vloxseg8:
1611 case Intrinsic::riscv_vluxseg2:
1612 case Intrinsic::riscv_vluxseg3:
1613 case Intrinsic::riscv_vluxseg4:
1614 case Intrinsic::riscv_vluxseg5:
1615 case Intrinsic::riscv_vluxseg6:
1616 case Intrinsic::riscv_vluxseg7:
1617 case Intrinsic::riscv_vluxseg8:
1618 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1619 /*IsStore*/ false,
1620 /*IsUnitStrided*/ false);
1621 case Intrinsic::riscv_vlsseg2_mask:
1622 case Intrinsic::riscv_vlsseg3_mask:
1623 case Intrinsic::riscv_vlsseg4_mask:
1624 case Intrinsic::riscv_vlsseg5_mask:
1625 case Intrinsic::riscv_vlsseg6_mask:
1626 case Intrinsic::riscv_vlsseg7_mask:
1627 case Intrinsic::riscv_vlsseg8_mask:
1628 case Intrinsic::riscv_vloxseg2_mask:
1629 case Intrinsic::riscv_vloxseg3_mask:
1630 case Intrinsic::riscv_vloxseg4_mask:
1631 case Intrinsic::riscv_vloxseg5_mask:
1632 case Intrinsic::riscv_vloxseg6_mask:
1633 case Intrinsic::riscv_vloxseg7_mask:
1634 case Intrinsic::riscv_vloxseg8_mask:
1635 case Intrinsic::riscv_vluxseg2_mask:
1636 case Intrinsic::riscv_vluxseg3_mask:
1637 case Intrinsic::riscv_vluxseg4_mask:
1638 case Intrinsic::riscv_vluxseg5_mask:
1639 case Intrinsic::riscv_vluxseg6_mask:
1640 case Intrinsic::riscv_vluxseg7_mask:
1641 case Intrinsic::riscv_vluxseg8_mask:
1642 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1643 /*IsStore*/ false,
1644 /*IsUnitStrided*/ false);
1645 case Intrinsic::riscv_vsseg2:
1646 case Intrinsic::riscv_vsseg3:
1647 case Intrinsic::riscv_vsseg4:
1648 case Intrinsic::riscv_vsseg5:
1649 case Intrinsic::riscv_vsseg6:
1650 case Intrinsic::riscv_vsseg7:
1651 case Intrinsic::riscv_vsseg8:
1652 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1653 /*IsStore*/ true,
1654 /*IsUnitStrided*/ false);
1655 case Intrinsic::riscv_vsseg2_mask:
1656 case Intrinsic::riscv_vsseg3_mask:
1657 case Intrinsic::riscv_vsseg4_mask:
1658 case Intrinsic::riscv_vsseg5_mask:
1659 case Intrinsic::riscv_vsseg6_mask:
1660 case Intrinsic::riscv_vsseg7_mask:
1661 case Intrinsic::riscv_vsseg8_mask:
1662 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1663 /*IsStore*/ true,
1664 /*IsUnitStrided*/ false);
1665 case Intrinsic::riscv_vssseg2:
1666 case Intrinsic::riscv_vssseg3:
1667 case Intrinsic::riscv_vssseg4:
1668 case Intrinsic::riscv_vssseg5:
1669 case Intrinsic::riscv_vssseg6:
1670 case Intrinsic::riscv_vssseg7:
1671 case Intrinsic::riscv_vssseg8:
1672 case Intrinsic::riscv_vsoxseg2:
1673 case Intrinsic::riscv_vsoxseg3:
1674 case Intrinsic::riscv_vsoxseg4:
1675 case Intrinsic::riscv_vsoxseg5:
1676 case Intrinsic::riscv_vsoxseg6:
1677 case Intrinsic::riscv_vsoxseg7:
1678 case Intrinsic::riscv_vsoxseg8:
1679 case Intrinsic::riscv_vsuxseg2:
1680 case Intrinsic::riscv_vsuxseg3:
1681 case Intrinsic::riscv_vsuxseg4:
1682 case Intrinsic::riscv_vsuxseg5:
1683 case Intrinsic::riscv_vsuxseg6:
1684 case Intrinsic::riscv_vsuxseg7:
1685 case Intrinsic::riscv_vsuxseg8:
1686 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1687 /*IsStore*/ true,
1688 /*IsUnitStrided*/ false);
1689 case Intrinsic::riscv_vssseg2_mask:
1690 case Intrinsic::riscv_vssseg3_mask:
1691 case Intrinsic::riscv_vssseg4_mask:
1692 case Intrinsic::riscv_vssseg5_mask:
1693 case Intrinsic::riscv_vssseg6_mask:
1694 case Intrinsic::riscv_vssseg7_mask:
1695 case Intrinsic::riscv_vssseg8_mask:
1696 case Intrinsic::riscv_vsoxseg2_mask:
1697 case Intrinsic::riscv_vsoxseg3_mask:
1698 case Intrinsic::riscv_vsoxseg4_mask:
1699 case Intrinsic::riscv_vsoxseg5_mask:
1700 case Intrinsic::riscv_vsoxseg6_mask:
1701 case Intrinsic::riscv_vsoxseg7_mask:
1702 case Intrinsic::riscv_vsoxseg8_mask:
1703 case Intrinsic::riscv_vsuxseg2_mask:
1704 case Intrinsic::riscv_vsuxseg3_mask:
1705 case Intrinsic::riscv_vsuxseg4_mask:
1706 case Intrinsic::riscv_vsuxseg5_mask:
1707 case Intrinsic::riscv_vsuxseg6_mask:
1708 case Intrinsic::riscv_vsuxseg7_mask:
1709 case Intrinsic::riscv_vsuxseg8_mask:
1710 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1711 /*IsStore*/ true,
1712 /*IsUnitStrided*/ false);
1713 }
1714}
1715
1717 const AddrMode &AM, Type *Ty,
1718 unsigned AS,
1719 Instruction *I) const {
1720 // No global is ever allowed as a base.
1721 if (AM.BaseGV)
1722 return false;
1723
1724 // RVV instructions only support register addressing.
1725 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1726 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1727
1728 // Require a 12-bit signed offset.
1729 if (!isInt<12>(AM.BaseOffs))
1730 return false;
1731
1732 switch (AM.Scale) {
1733 case 0: // "r+i" or just "i", depending on HasBaseReg.
1734 break;
1735 case 1:
1736 if (!AM.HasBaseReg) // allow "r+i".
1737 break;
1738 return false; // disallow "r+r" or "r+r+i".
1739 default:
1740 return false;
1741 }
1742
1743 return true;
1744}
1745
1747 return isInt<12>(Imm);
1748}
1749
1751 return isInt<12>(Imm);
1752}
1753
1754// On RV32, 64-bit integers are split into their high and low parts and held
1755// in two different registers, so the trunc is free since the low register can
1756// just be used.
1757// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1758// isTruncateFree?
1760 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1761 return false;
1762 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1763 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1764 return (SrcBits == 64 && DestBits == 32);
1765}
1766
1768 // We consider i64->i32 free on RV64 since we have good selection of W
1769 // instructions that make promoting operations back to i64 free in many cases.
1770 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1771 !DstVT.isInteger())
1772 return false;
1773 unsigned SrcBits = SrcVT.getSizeInBits();
1774 unsigned DestBits = DstVT.getSizeInBits();
1775 return (SrcBits == 64 && DestBits == 32);
1776}
1777
1779 // Zexts are free if they can be combined with a load.
1780 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1781 // poorly with type legalization of compares preferring sext.
1782 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1783 EVT MemVT = LD->getMemoryVT();
1784 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1785 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1786 LD->getExtensionType() == ISD::ZEXTLOAD))
1787 return true;
1788 }
1789
1790 return TargetLowering::isZExtFree(Val, VT2);
1791}
1792
1794 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1795}
1796
1798 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1799}
1800
1802 return Subtarget.hasStdExtZbb();
1803}
1804
1806 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1807}
1808
1810 const Instruction &AndI) const {
1811 // We expect to be able to match a bit extraction instruction if the Zbs
1812 // extension is supported and the mask is a power of two. However, we
1813 // conservatively return false if the mask would fit in an ANDI instruction,
1814 // on the basis that it's possible the sinking+duplication of the AND in
1815 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1816 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1817 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1818 return false;
1819 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1820 if (!Mask)
1821 return false;
1822 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1823}
1824
1826 EVT VT = Y.getValueType();
1827
1828 // FIXME: Support vectors once we have tests.
1829 if (VT.isVector())
1830 return false;
1831
1832 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1833 !isa<ConstantSDNode>(Y);
1834}
1835
1837 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1838 if (Subtarget.hasStdExtZbs())
1839 return X.getValueType().isScalarInteger();
1840 auto *C = dyn_cast<ConstantSDNode>(Y);
1841 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1842 if (Subtarget.hasVendorXTHeadBs())
1843 return C != nullptr;
1844 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1845 return C && C->getAPIntValue().ule(10);
1846}
1847
1849 EVT VT) const {
1850 // Only enable for rvv.
1851 if (!VT.isVector() || !Subtarget.hasVInstructions())
1852 return false;
1853
1854 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1855 return false;
1856
1857 return true;
1858}
1859
1861 Type *Ty) const {
1862 assert(Ty->isIntegerTy());
1863
1864 unsigned BitSize = Ty->getIntegerBitWidth();
1865 if (BitSize > Subtarget.getXLen())
1866 return false;
1867
1868 // Fast path, assume 32-bit immediates are cheap.
1869 int64_t Val = Imm.getSExtValue();
1870 if (isInt<32>(Val))
1871 return true;
1872
1873 // A constant pool entry may be more aligned thant he load we're trying to
1874 // replace. If we don't support unaligned scalar mem, prefer the constant
1875 // pool.
1876 // TODO: Can the caller pass down the alignment?
1877 if (!Subtarget.enableUnalignedScalarMem())
1878 return true;
1879
1880 // Prefer to keep the load if it would require many instructions.
1881 // This uses the same threshold we use for constant pools but doesn't
1882 // check useConstantPoolForLargeInts.
1883 // TODO: Should we keep the load only when we're definitely going to emit a
1884 // constant pool?
1885
1887 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1888}
1889
1893 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1894 SelectionDAG &DAG) const {
1895 // One interesting pattern that we'd want to form is 'bit extract':
1896 // ((1 >> Y) & 1) ==/!= 0
1897 // But we also need to be careful not to try to reverse that fold.
1898
1899 // Is this '((1 >> Y) & 1)'?
1900 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1901 return false; // Keep the 'bit extract' pattern.
1902
1903 // Will this be '((1 >> Y) & 1)' after the transform?
1904 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1905 return true; // Do form the 'bit extract' pattern.
1906
1907 // If 'X' is a constant, and we transform, then we will immediately
1908 // try to undo the fold, thus causing endless combine loop.
1909 // So only do the transform if X is not a constant. This matches the default
1910 // implementation of this function.
1911 return !XC;
1912}
1913
1914bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1915 switch (Opcode) {
1916 case Instruction::Add:
1917 case Instruction::Sub:
1918 case Instruction::Mul:
1919 case Instruction::And:
1920 case Instruction::Or:
1921 case Instruction::Xor:
1922 case Instruction::FAdd:
1923 case Instruction::FSub:
1924 case Instruction::FMul:
1925 case Instruction::FDiv:
1926 case Instruction::ICmp:
1927 case Instruction::FCmp:
1928 return true;
1929 case Instruction::Shl:
1930 case Instruction::LShr:
1931 case Instruction::AShr:
1932 case Instruction::UDiv:
1933 case Instruction::SDiv:
1934 case Instruction::URem:
1935 case Instruction::SRem:
1936 return Operand == 1;
1937 default:
1938 return false;
1939 }
1940}
1941
1942
1944 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1945 return false;
1946
1947 if (canSplatOperand(I->getOpcode(), Operand))
1948 return true;
1949
1950 auto *II = dyn_cast<IntrinsicInst>(I);
1951 if (!II)
1952 return false;
1953
1954 switch (II->getIntrinsicID()) {
1955 case Intrinsic::fma:
1956 case Intrinsic::vp_fma:
1957 return Operand == 0 || Operand == 1;
1958 case Intrinsic::vp_shl:
1959 case Intrinsic::vp_lshr:
1960 case Intrinsic::vp_ashr:
1961 case Intrinsic::vp_udiv:
1962 case Intrinsic::vp_sdiv:
1963 case Intrinsic::vp_urem:
1964 case Intrinsic::vp_srem:
1965 return Operand == 1;
1966 // These intrinsics are commutative.
1967 case Intrinsic::vp_add:
1968 case Intrinsic::vp_mul:
1969 case Intrinsic::vp_and:
1970 case Intrinsic::vp_or:
1971 case Intrinsic::vp_xor:
1972 case Intrinsic::vp_fadd:
1973 case Intrinsic::vp_fmul:
1974 case Intrinsic::vp_icmp:
1975 case Intrinsic::vp_fcmp:
1976 // These intrinsics have 'vr' versions.
1977 case Intrinsic::vp_sub:
1978 case Intrinsic::vp_fsub:
1979 case Intrinsic::vp_fdiv:
1980 return Operand == 0 || Operand == 1;
1981 default:
1982 return false;
1983 }
1984}
1985
1986/// Check if sinking \p I's operands to I's basic block is profitable, because
1987/// the operands can be folded into a target instruction, e.g.
1988/// splats of scalars can fold into vector instructions.
1990 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1991 using namespace llvm::PatternMatch;
1992
1993 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1994 return false;
1995
1996 for (auto OpIdx : enumerate(I->operands())) {
1997 if (!canSplatOperand(I, OpIdx.index()))
1998 continue;
1999
2000 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2001 // Make sure we are not already sinking this operand
2002 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2003 continue;
2004
2005 // We are looking for a splat that can be sunk.
2007 m_Undef(), m_ZeroMask())))
2008 continue;
2009
2010 // Don't sink i1 splats.
2011 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2012 continue;
2013
2014 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2015 // and vector registers
2016 for (Use &U : Op->uses()) {
2017 Instruction *Insn = cast<Instruction>(U.getUser());
2018 if (!canSplatOperand(Insn, U.getOperandNo()))
2019 return false;
2020 }
2021
2022 Ops.push_back(&Op->getOperandUse(0));
2023 Ops.push_back(&OpIdx.value());
2024 }
2025 return true;
2026}
2027
2029 unsigned Opc = VecOp.getOpcode();
2030
2031 // Assume target opcodes can't be scalarized.
2032 // TODO - do we have any exceptions?
2033 if (Opc >= ISD::BUILTIN_OP_END)
2034 return false;
2035
2036 // If the vector op is not supported, try to convert to scalar.
2037 EVT VecVT = VecOp.getValueType();
2038 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2039 return true;
2040
2041 // If the vector op is supported, but the scalar op is not, the transform may
2042 // not be worthwhile.
2043 // Permit a vector binary operation can be converted to scalar binary
2044 // operation which is custom lowered with illegal type.
2045 EVT ScalarVT = VecVT.getScalarType();
2046 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2047 isOperationCustom(Opc, ScalarVT);
2048}
2049
2051 const GlobalAddressSDNode *GA) const {
2052 // In order to maximise the opportunity for common subexpression elimination,
2053 // keep a separate ADD node for the global address offset instead of folding
2054 // it in the global address node. Later peephole optimisations may choose to
2055 // fold it back in when profitable.
2056 return false;
2057}
2058
2059// Return one of the followings:
2060// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2061// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2062// positive counterpart, which will be materialized from the first returned
2063// element. The second returned element indicated that there should be a FNEG
2064// followed.
2065// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2066std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2067 EVT VT) const {
2068 if (!Subtarget.hasStdExtZfa())
2069 return std::make_pair(-1, false);
2070
2071 bool IsSupportedVT = false;
2072 if (VT == MVT::f16) {
2073 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2074 } else if (VT == MVT::f32) {
2075 IsSupportedVT = true;
2076 } else if (VT == MVT::f64) {
2077 assert(Subtarget.hasStdExtD() && "Expect D extension");
2078 IsSupportedVT = true;
2079 }
2080
2081 if (!IsSupportedVT)
2082 return std::make_pair(-1, false);
2083
2085 if (Index < 0 && Imm.isNegative())
2086 // Try the combination of its positive counterpart + FNEG.
2087 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2088 else
2089 return std::make_pair(Index, false);
2090}
2091
2093 bool ForCodeSize) const {
2094 bool IsLegalVT = false;
2095 if (VT == MVT::f16)
2096 IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
2097 else if (VT == MVT::f32)
2098 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2099 else if (VT == MVT::f64)
2100 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2101 else if (VT == MVT::bf16)
2102 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2103
2104 if (!IsLegalVT)
2105 return false;
2106
2107 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2108 return true;
2109
2110 // Cannot create a 64 bit floating-point immediate value for rv32.
2111 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2112 // td can handle +0.0 or -0.0 already.
2113 // -0.0 can be created by fmv + fneg.
2114 return Imm.isZero();
2115 }
2116
2117 // Special case: fmv + fneg
2118 if (Imm.isNegZero())
2119 return true;
2120
2121 // Building an integer and then converting requires a fmv at the end of
2122 // the integer sequence.
2123 const int Cost =
2124 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2125 Subtarget);
2126 return Cost <= FPImmCost;
2127}
2128
2129// TODO: This is very conservative.
2131 unsigned Index) const {
2133 return false;
2134
2135 // Only support extracting a fixed from a fixed vector for now.
2136 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2137 return false;
2138
2139 unsigned ResElts = ResVT.getVectorNumElements();
2140 unsigned SrcElts = SrcVT.getVectorNumElements();
2141
2142 // Convervatively only handle extracting half of a vector.
2143 // TODO: Relax this.
2144 if ((ResElts * 2) != SrcElts)
2145 return false;
2146
2147 // The smallest type we can slide is i8.
2148 // TODO: We can extract index 0 from a mask vector without a slide.
2149 if (ResVT.getVectorElementType() == MVT::i1)
2150 return false;
2151
2152 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2153 // cheap.
2154 if (Index >= 32)
2155 return false;
2156
2157 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2158 // the upper half of a vector until we have more test coverage.
2159 return Index == 0 || Index == ResElts;
2160}
2161
2164 EVT VT) const {
2165 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2166 // We might still end up using a GPR but that will be decided based on ABI.
2167 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2169 return MVT::f32;
2170
2172
2173 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2174 return MVT::i64;
2175
2176 return PartVT;
2177}
2178
2181 EVT VT) const {
2182 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2183 // We might still end up using a GPR but that will be decided based on ABI.
2184 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2186 return 1;
2187
2189}
2190
2192 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2193 unsigned &NumIntermediates, MVT &RegisterVT) const {
2195 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2196
2197 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2198 IntermediateVT = MVT::i64;
2199
2200 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2201 RegisterVT = MVT::i64;
2202
2203 return NumRegs;
2204}
2205
2206// Changes the condition code and swaps operands if necessary, so the SetCC
2207// operation matches one of the comparisons supported directly by branches
2208// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2209// with 1/-1.
2210static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2211 ISD::CondCode &CC, SelectionDAG &DAG) {
2212 // If this is a single bit test that can't be handled by ANDI, shift the
2213 // bit to be tested to the MSB and perform a signed compare with 0.
2214 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2215 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2216 isa<ConstantSDNode>(LHS.getOperand(1))) {
2217 uint64_t Mask = LHS.getConstantOperandVal(1);
2218 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2219 unsigned ShAmt = 0;
2220 if (isPowerOf2_64(Mask)) {
2222 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2223 } else {
2224 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2225 }
2226
2227 LHS = LHS.getOperand(0);
2228 if (ShAmt != 0)
2229 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2230 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2231 return;
2232 }
2233 }
2234
2235 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2236 int64_t C = RHSC->getSExtValue();
2237 switch (CC) {
2238 default: break;
2239 case ISD::SETGT:
2240 // Convert X > -1 to X >= 0.
2241 if (C == -1) {
2242 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2243 CC = ISD::SETGE;
2244 return;
2245 }
2246 break;
2247 case ISD::SETLT:
2248 // Convert X < 1 to 0 >= X.
2249 if (C == 1) {
2250 RHS = LHS;
2251 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2252 CC = ISD::SETGE;
2253 return;
2254 }
2255 break;
2256 }
2257 }
2258
2259 switch (CC) {
2260 default:
2261 break;
2262 case ISD::SETGT:
2263 case ISD::SETLE:
2264 case ISD::SETUGT:
2265 case ISD::SETULE:
2267 std::swap(LHS, RHS);
2268 break;
2269 }
2270}
2271
2273 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2274 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2275 if (VT.getVectorElementType() == MVT::i1)
2276 KnownSize *= 8;
2277
2278 switch (KnownSize) {
2279 default:
2280 llvm_unreachable("Invalid LMUL.");
2281 case 8:
2283 case 16:
2285 case 32:
2287 case 64:
2289 case 128:
2291 case 256:
2293 case 512:
2295 }
2296}
2297
2299 switch (LMul) {
2300 default:
2301 llvm_unreachable("Invalid LMUL.");
2306 return RISCV::VRRegClassID;
2308 return RISCV::VRM2RegClassID;
2310 return RISCV::VRM4RegClassID;
2312 return RISCV::VRM8RegClassID;
2313 }
2314}
2315
2317 RISCVII::VLMUL LMUL = getLMUL(VT);
2318 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2319 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2320 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2321 LMUL == RISCVII::VLMUL::LMUL_1) {
2322 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2323 "Unexpected subreg numbering");
2324 return RISCV::sub_vrm1_0 + Index;
2325 }
2326 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2327 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2328 "Unexpected subreg numbering");
2329 return RISCV::sub_vrm2_0 + Index;
2330 }
2331 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2332 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2333 "Unexpected subreg numbering");
2334 return RISCV::sub_vrm4_0 + Index;
2335 }
2336 llvm_unreachable("Invalid vector type.");
2337}
2338
2340 if (VT.getVectorElementType() == MVT::i1)
2341 return RISCV::VRRegClassID;
2342 return getRegClassIDForLMUL(getLMUL(VT));
2343}
2344
2345// Attempt to decompose a subvector insert/extract between VecVT and
2346// SubVecVT via subregister indices. Returns the subregister index that
2347// can perform the subvector insert/extract with the given element index, as
2348// well as the index corresponding to any leftover subvectors that must be
2349// further inserted/extracted within the register class for SubVecVT.
2350std::pair<unsigned, unsigned>
2352 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2353 const RISCVRegisterInfo *TRI) {
2354 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2355 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2356 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2357 "Register classes not ordered");
2358 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2359 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2360 // Try to compose a subregister index that takes us from the incoming
2361 // LMUL>1 register class down to the outgoing one. At each step we half
2362 // the LMUL:
2363 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2364 // Note that this is not guaranteed to find a subregister index, such as
2365 // when we are extracting from one VR type to another.
2366 unsigned SubRegIdx = RISCV::NoSubRegister;
2367 for (const unsigned RCID :
2368 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2369 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2370 VecVT = VecVT.getHalfNumVectorElementsVT();
2371 bool IsHi =
2372 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2373 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2374 getSubregIndexByMVT(VecVT, IsHi));
2375 if (IsHi)
2376 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2377 }
2378 return {SubRegIdx, InsertExtractIdx};
2379}
2380
2381// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2382// stores for those types.
2383bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2384 return !Subtarget.useRVVForFixedLengthVectors() ||
2385 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2386}
2387
2389 if (!ScalarTy.isSimple())
2390 return false;
2391 switch (ScalarTy.getSimpleVT().SimpleTy) {
2392 case MVT::iPTR:
2393 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2394 case MVT::i8:
2395 case MVT::i16:
2396 case MVT::i32:
2397 return true;
2398 case MVT::i64:
2399 return Subtarget.hasVInstructionsI64();
2400 case MVT::f16:
2401 return Subtarget.hasVInstructionsF16();
2402 case MVT::f32:
2403 return Subtarget.hasVInstructionsF32();
2404 case MVT::f64:
2405 return Subtarget.hasVInstructionsF64();
2406 default:
2407 return false;
2408 }
2409}
2410
2411
2412unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2413 return NumRepeatedDivisors;
2414}
2415
2417 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2418 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2419 "Unexpected opcode");
2420 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2421 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2423 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2424 if (!II)
2425 return SDValue();
2426 return Op.getOperand(II->VLOperand + 1 + HasChain);
2427}
2428
2430 const RISCVSubtarget &Subtarget) {
2431 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2432 if (!Subtarget.useRVVForFixedLengthVectors())
2433 return false;
2434
2435 // We only support a set of vector types with a consistent maximum fixed size
2436 // across all supported vector element types to avoid legalization issues.
2437 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2438 // fixed-length vector type we support is 1024 bytes.
2439 if (VT.getFixedSizeInBits() > 1024 * 8)
2440 return false;
2441
2442 unsigned MinVLen = Subtarget.getRealMinVLen();
2443
2444 MVT EltVT = VT.getVectorElementType();
2445
2446 // Don't use RVV for vectors we cannot scalarize if required.
2447 switch (EltVT.SimpleTy) {
2448 // i1 is supported but has different rules.
2449 default:
2450 return false;
2451 case MVT::i1:
2452 // Masks can only use a single register.
2453 if (VT.getVectorNumElements() > MinVLen)
2454 return false;
2455 MinVLen /= 8;
2456 break;
2457 case MVT::i8:
2458 case MVT::i16:
2459 case MVT::i32:
2460 break;
2461 case MVT::i64:
2462 if (!Subtarget.hasVInstructionsI64())
2463 return false;
2464 break;
2465 case MVT::f16:
2466 if (!Subtarget.hasVInstructionsF16Minimal())
2467 return false;
2468 break;
2469 case MVT::f32:
2470 if (!Subtarget.hasVInstructionsF32())
2471 return false;
2472 break;
2473 case MVT::f64:
2474 if (!Subtarget.hasVInstructionsF64())
2475 return false;
2476 break;
2477 }
2478
2479 // Reject elements larger than ELEN.
2480 if (EltVT.getSizeInBits() > Subtarget.getELen())
2481 return false;
2482
2483 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2484 // Don't use RVV for types that don't fit.
2485 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2486 return false;
2487
2488 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2489 // the base fixed length RVV support in place.
2490 if (!VT.isPow2VectorType())
2491 return false;
2492
2493 return true;
2494}
2495
2496bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2497 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2498}
2499
2500// Return the largest legal scalable vector type that matches VT's element type.
2502 const RISCVSubtarget &Subtarget) {
2503 // This may be called before legal types are setup.
2504 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2505 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2506 "Expected legal fixed length vector!");
2507
2508 unsigned MinVLen = Subtarget.getRealMinVLen();
2509 unsigned MaxELen = Subtarget.getELen();
2510
2511 MVT EltVT = VT.getVectorElementType();
2512 switch (EltVT.SimpleTy) {
2513 default:
2514 llvm_unreachable("unexpected element type for RVV container");
2515 case MVT::i1:
2516 case MVT::i8:
2517 case MVT::i16:
2518 case MVT::i32:
2519 case MVT::i64:
2520 case MVT::f16:
2521 case MVT::f32:
2522 case MVT::f64: {
2523 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2524 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2525 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2526 unsigned NumElts =
2528 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2529 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2530 return MVT::getScalableVectorVT(EltVT, NumElts);
2531 }
2532 }
2533}
2534
2536 const RISCVSubtarget &Subtarget) {
2538 Subtarget);
2539}
2540
2542 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2543}
2544
2545// Grow V to consume an entire RVV register.
2547 const RISCVSubtarget &Subtarget) {
2548 assert(VT.isScalableVector() &&
2549 "Expected to convert into a scalable vector!");
2550 assert(V.getValueType().isFixedLengthVector() &&
2551 "Expected a fixed length vector operand!");
2552 SDLoc DL(V);
2553 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2554 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2555}
2556
2557// Shrink V so it's just big enough to maintain a VT's worth of data.
2559 const RISCVSubtarget &Subtarget) {
2561 "Expected to convert into a fixed length vector!");
2562 assert(V.getValueType().isScalableVector() &&
2563 "Expected a scalable vector operand!");
2564 SDLoc DL(V);
2565 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2566 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2567}
2568
2569/// Return the type of the mask type suitable for masking the provided
2570/// vector type. This is simply an i1 element type vector of the same
2571/// (possibly scalable) length.
2572static MVT getMaskTypeFor(MVT VecVT) {
2573 assert(VecVT.isVector());
2575 return MVT::getVectorVT(MVT::i1, EC);
2576}
2577
2578/// Creates an all ones mask suitable for masking a vector of type VecTy with
2579/// vector length VL. .
2580static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2581 SelectionDAG &DAG) {
2582 MVT MaskVT = getMaskTypeFor(VecVT);
2583 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2584}
2585
2586static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
2587 const RISCVSubtarget &Subtarget) {
2588 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2589}
2590
2591static std::pair<SDValue, SDValue>
2593 const RISCVSubtarget &Subtarget) {
2594 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2595 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2596 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2597 return {Mask, VL};
2598}
2599
2600static std::pair<SDValue, SDValue>
2601getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2602 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2603 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2604 SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
2605 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2606 return {Mask, VL};
2607}
2608
2609// Gets the two common "VL" operands: an all-ones mask and the vector length.
2610// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2611// the vector type that the fixed-length vector is contained in. Otherwise if
2612// VecVT is scalable, then ContainerVT should be the same as VecVT.
2613static std::pair<SDValue, SDValue>
2614getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2615 const RISCVSubtarget &Subtarget) {
2616 if (VecVT.isFixedLengthVector())
2617 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2618 Subtarget);
2619 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2620 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2621}
2622
2624 SelectionDAG &DAG) const {
2625 assert(VecVT.isScalableVector() && "Expected scalable vector");
2626 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2627 VecVT.getVectorElementCount());
2628}
2629
2630// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2631// of either is (currently) supported. This can get us into an infinite loop
2632// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2633// as a ..., etc.
2634// Until either (or both) of these can reliably lower any node, reporting that
2635// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2636// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2637// which is not desirable.
2639 EVT VT, unsigned DefinedValues) const {
2640 return false;
2641}
2642
2644 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2645 // implementation-defined.
2646 if (!VT.isVector())
2648 unsigned DLenFactor = Subtarget.getDLenFactor();
2649 unsigned Cost;
2650 if (VT.isScalableVector()) {
2651 unsigned LMul;
2652 bool Fractional;
2653 std::tie(LMul, Fractional) =
2655 if (Fractional)
2656 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2657 else
2658 Cost = (LMul * DLenFactor);
2659 } else {
2660 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2661 }
2662 return Cost;
2663}
2664
2665
2666/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2667/// is generally quadratic in the number of vreg implied by LMUL. Note that
2668/// operand (index and possibly mask) are handled separately.
2670 return getLMULCost(VT) * getLMULCost(VT);
2671}
2672
2673/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2674/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2675/// or may track the vrgather.vv cost. It is implementation-dependent.
2677 return getLMULCost(VT);
2678}
2679
2680/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
2681/// for the type VT. (This does not cover the vslide1up or vslide1down
2682/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2683/// or may track the vrgather.vv cost. It is implementation-dependent.
2685 return getLMULCost(VT);
2686}
2687
2689 const RISCVSubtarget &Subtarget) {
2690 // RISC-V FP-to-int conversions saturate to the destination register size, but
2691 // don't produce 0 for nan. We can use a conversion instruction and fix the
2692 // nan case with a compare and a select.
2693 SDValue Src = Op.getOperand(0);
2694
2695 MVT DstVT = Op.getSimpleValueType();
2696 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2697
2698 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2699
2700 if (!DstVT.isVector()) {
2701 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2702 // the result.
2703 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2704 Src.getValueType() == MVT::bf16) {
2705 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2706 }
2707
2708 unsigned Opc;
2709 if (SatVT == DstVT)
2710 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2711 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2713 else
2714 return SDValue();
2715 // FIXME: Support other SatVTs by clamping before or after the conversion.
2716
2717 SDLoc DL(Op);
2718 SDValue FpToInt = DAG.getNode(
2719 Opc, DL, DstVT, Src,
2721
2722 if (Opc == RISCVISD::FCVT_WU_RV64)
2723 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2724
2725 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2726 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2728 }
2729
2730 // Vectors.
2731
2732 MVT DstEltVT = DstVT.getVectorElementType();
2733 MVT SrcVT = Src.getSimpleValueType();
2734 MVT SrcEltVT = SrcVT.getVectorElementType();
2735 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2736 unsigned DstEltSize = DstEltVT.getSizeInBits();
2737
2738 // Only handle saturating to the destination type.
2739 if (SatVT != DstEltVT)
2740 return SDValue();
2741
2742 // FIXME: Don't support narrowing by more than 1 steps for now.
2743 if (SrcEltSize > (2 * DstEltSize))
2744 return SDValue();
2745
2746 MVT DstContainerVT = DstVT;
2747 MVT SrcContainerVT = SrcVT;
2748 if (DstVT.isFixedLengthVector()) {
2749 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2750 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2751 assert(DstContainerVT.getVectorElementCount() ==
2752 SrcContainerVT.getVectorElementCount() &&
2753 "Expected same element count");
2754 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2755 }
2756
2757 SDLoc DL(Op);
2758
2759 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2760
2761 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2762 {Src, Src, DAG.getCondCode(ISD::SETNE),
2763 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2764
2765 // Need to widen by more than 1 step, promote the FP type, then do a widening
2766 // convert.
2767 if (DstEltSize > (2 * SrcEltSize)) {
2768 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2769 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2770 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2771 }
2772
2773 unsigned RVVOpc =
2775 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2776
2777 SDValue SplatZero = DAG.getNode(
2778 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2779 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2780 Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
2781 Res, VL);
2782
2783 if (DstVT.isFixedLengthVector())
2784 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2785
2786 return Res;
2787}
2788
2790 switch (Opc) {
2791 case ISD::FROUNDEVEN:
2793 case ISD::VP_FROUNDEVEN:
2794 return RISCVFPRndMode::RNE;
2795 case ISD::FTRUNC:
2796 case ISD::STRICT_FTRUNC:
2797 case ISD::VP_FROUNDTOZERO:
2798 return RISCVFPRndMode::RTZ;
2799 case ISD::FFLOOR:
2800 case ISD::STRICT_FFLOOR:
2801 case ISD::VP_FFLOOR:
2802 return RISCVFPRndMode::RDN;
2803 case ISD::FCEIL:
2804 case ISD::STRICT_FCEIL:
2805 case ISD::VP_FCEIL:
2806 return RISCVFPRndMode::RUP;
2807 case ISD::FROUND:
2808 case ISD::STRICT_FROUND:
2809 case ISD::VP_FROUND:
2810 return RISCVFPRndMode::RMM;
2811 case ISD::FRINT:
2812 return RISCVFPRndMode::DYN;
2813 }
2814
2816}
2817
2818// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2819// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2820// the integer domain and back. Taking care to avoid converting values that are
2821// nan or already correct.
2822static SDValue
2824 const RISCVSubtarget &Subtarget) {
2825 MVT VT = Op.getSimpleValueType();
2826 assert(VT.isVector() && "Unexpected type");
2827
2828 SDLoc DL(Op);
2829
2830 SDValue Src = Op.getOperand(0);
2831
2832 MVT ContainerVT = VT;
2833 if (VT.isFixedLengthVector()) {
2834 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2835 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2836 }
2837
2838 SDValue Mask, VL;
2839 if (Op->isVPOpcode()) {
2840 Mask = Op.getOperand(1);
2841 if (VT.isFixedLengthVector())
2842 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2843 Subtarget);
2844 VL = Op.getOperand(2);
2845 } else {
2846 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2847 }
2848
2849 // Freeze the source since we are increasing the number of uses.
2850 Src = DAG.getFreeze(Src);
2851
2852 // We do the conversion on the absolute value and fix the sign at the end.
2853 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2854
2855 // Determine the largest integer that can be represented exactly. This and
2856 // values larger than it don't have any fractional bits so don't need to
2857 // be converted.
2858 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2859 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2860 APFloat MaxVal = APFloat(FltSem);
2861 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2862 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2863 SDValue MaxValNode =
2864 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2865 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2866 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2867
2868 // If abs(Src) was larger than MaxVal or nan, keep it.
2869 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2870 Mask =
2871 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2872 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2873 Mask, Mask, VL});
2874
2875 // Truncate to integer and convert back to FP.
2876 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2877 MVT XLenVT = Subtarget.getXLenVT();
2878 SDValue Truncated;
2879
2880 switch (Op.getOpcode()) {
2881 default:
2882 llvm_unreachable("Unexpected opcode");
2883 case ISD::FCEIL:
2884 case ISD::VP_FCEIL:
2885 case ISD::FFLOOR:
2886 case ISD::VP_FFLOOR:
2887 case ISD::FROUND:
2888 case ISD::FROUNDEVEN:
2889 case ISD::VP_FROUND:
2890 case ISD::VP_FROUNDEVEN:
2891 case ISD::VP_FROUNDTOZERO: {
2894 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2895 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2896 break;
2897 }
2898 case ISD::FTRUNC:
2899 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2900 Mask, VL);
2901 break;
2902 case ISD::FRINT:
2903 case ISD::VP_FRINT:
2904 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2905 break;
2906 case ISD::FNEARBYINT:
2907 case ISD::VP_FNEARBYINT:
2908 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2909 Mask, VL);
2910 break;
2911 }
2912
2913 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2914 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
2915 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2916 Mask, VL);
2917
2918 // Restore the original sign so that -0.0 is preserved.
2919 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2920 Src, Src, Mask, VL);
2921
2922 if (!VT.isFixedLengthVector())
2923 return Truncated;
2924
2925 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2926}
2927
2928// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2929// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2930// qNan and coverting the new source to integer and back to FP.
2931static SDValue
2933 const RISCVSubtarget &Subtarget) {
2934 SDLoc DL(Op);
2935 MVT VT = Op.getSimpleValueType();
2936 SDValue Chain = Op.getOperand(0);
2937 SDValue Src = Op.getOperand(1);
2938
2939 MVT ContainerVT = VT;
2940 if (VT.isFixedLengthVector()) {
2941 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2942 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2943 }
2944
2945 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2946
2947 // Freeze the source since we are increasing the number of uses.
2948 Src = DAG.getFreeze(Src);
2949
2950 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2951 MVT MaskVT = Mask.getSimpleValueType();
2953 DAG.getVTList(MaskVT, MVT::Other),
2954 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
2955 DAG.getUNDEF(MaskVT), Mask, VL});
2956 Chain = Unorder.getValue(1);
2958 DAG.getVTList(ContainerVT, MVT::Other),
2959 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
2960 Chain = Src.getValue(1);
2961
2962 // We do the conversion on the absolute value and fix the sign at the end.
2963 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2964
2965 // Determine the largest integer that can be represented exactly. This and
2966 // values larger than it don't have any fractional bits so don't need to
2967 // be converted.
2968 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2969 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2970 APFloat MaxVal = APFloat(FltSem);
2971 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2972 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2973 SDValue MaxValNode =
2974 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2975 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2976 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2977
2978 // If abs(Src) was larger than MaxVal or nan, keep it.
2979 Mask = DAG.getNode(
2980 RISCVISD::SETCC_VL, DL, MaskVT,
2981 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
2982
2983 // Truncate to integer and convert back to FP.
2984 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2985 MVT XLenVT = Subtarget.getXLenVT();
2986 SDValue Truncated;
2987
2988 switch (Op.getOpcode()) {
2989 default:
2990 llvm_unreachable("Unexpected opcode");
2991 case ISD::STRICT_FCEIL:
2992 case ISD::STRICT_FFLOOR:
2993 case ISD::STRICT_FROUND:
2997 Truncated = DAG.getNode(
2998 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
2999 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3000 break;
3001 }
3002 case ISD::STRICT_FTRUNC:
3003 Truncated =
3005 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3006 break;
3009 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3010 Mask, VL);
3011 break;
3012 }
3013 Chain = Truncated.getValue(1);
3014
3015 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3016 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3017 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3018 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3019 Truncated, Mask, VL);
3020 Chain = Truncated.getValue(1);
3021 }
3022
3023 // Restore the original sign so that -0.0 is preserved.
3024 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3025 Src, Src, Mask, VL);
3026
3027 if (VT.isFixedLengthVector())
3028 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3029 return DAG.getMergeValues({Truncated, Chain}, DL);
3030}
3031
3032static SDValue
3034 const RISCVSubtarget &Subtarget) {
3035 MVT VT = Op.getSimpleValueType();
3036 if (VT.isVector())
3037 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3038
3039 if (DAG.shouldOptForSize())
3040 return SDValue();
3041
3042 SDLoc DL(Op);
3043 SDValue Src = Op.getOperand(0);
3044
3045 // Create an integer the size of the mantissa with the MSB set. This and all
3046 // values larger than it don't have any fractional bits so don't need to be
3047 // converted.
3048 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3049 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3050 APFloat MaxVal = APFloat(FltSem);
3051 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3052 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3053 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3054
3056 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3057 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3058}
3059
3060// Expand vector LRINT and LLRINT by converting to the integer domain.
3062 const RISCVSubtarget &Subtarget) {
3063 MVT VT = Op.getSimpleValueType();
3064 assert(VT.isVector() && "Unexpected type");
3065
3066 SDLoc DL(Op);
3067 SDValue Src = Op.getOperand(0);
3068 MVT ContainerVT = VT;
3069
3070 if (VT.isFixedLengthVector()) {
3071 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3072 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3073 }
3074
3075 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3076 SDValue Truncated =
3077 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3078
3079 if (!VT.isFixedLengthVector())
3080 return Truncated;
3081
3082 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3083}
3084
3085static SDValue
3087 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3088 SDValue Offset, SDValue Mask, SDValue VL,
3090 if (Merge.isUndef())
3092 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3093 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3094 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3095}
3096
3097static SDValue
3098getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3100 SDValue VL,
3102 if (Merge.isUndef())
3104 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3105 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3106 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3107}
3108
3109static MVT getLMUL1VT(MVT VT) {
3111 "Unexpected vector MVT");
3115}
3116
3120 int64_t Addend;
3121};
3122
3123static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3125 APSInt ValInt(BitWidth, !APF.isNegative());
3126 // We use an arbitrary rounding mode here. If a floating-point is an exact
3127 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3128 // the rounding mode changes the output value, then it is not an exact
3129 // integer.
3131 bool IsExact;
3132 // If it is out of signed integer range, it will return an invalid operation.
3133 // If it is not an exact integer, IsExact is false.
3134 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3136 !IsExact)
3137 return std::nullopt;
3138 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3139}
3140
3141// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3142// to the (non-zero) step S and start value X. This can be then lowered as the
3143// RVV sequence (VID * S) + X, for example.
3144// The step S is represented as an integer numerator divided by a positive
3145// denominator. Note that the implementation currently only identifies
3146// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3147// cannot detect 2/3, for example.
3148// Note that this method will also match potentially unappealing index
3149// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3150// determine whether this is worth generating code for.
3151static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
3152 unsigned NumElts = Op.getNumOperands();
3153 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3154 bool IsInteger = Op.getValueType().isInteger();
3155
3156 std::optional<unsigned> SeqStepDenom;
3157 std::optional<int64_t> SeqStepNum, SeqAddend;
3158 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3159 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
3160 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3161 // Assume undef elements match the sequence; we just have to be careful
3162 // when interpolating across them.
3163 if (Op.getOperand(Idx).isUndef())
3164 continue;
3165
3166 uint64_t Val;
3167 if (IsInteger) {
3168 // The BUILD_VECTOR must be all constants.
3169 if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
3170 return std::nullopt;
3171 Val = Op.getConstantOperandVal(Idx) &
3172 maskTrailingOnes<uint64_t>(EltSizeInBits);
3173 } else {
3174 // The BUILD_VECTOR must be all constants.
3175 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
3176 return std::nullopt;
3177 if (auto ExactInteger = getExactInteger(
3178 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3179 EltSizeInBits))
3180 Val = *ExactInteger;
3181 else
3182 return std::nullopt;
3183 }
3184
3185 if (PrevElt) {
3186 // Calculate the step since the last non-undef element, and ensure
3187 // it's consistent across the entire sequence.
3188 unsigned IdxDiff = Idx - PrevElt->second;
3189 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
3190
3191 // A zero-value value difference means that we're somewhere in the middle
3192 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3193 // step change before evaluating the sequence.
3194 if (ValDiff == 0)
3195 continue;
3196
3197 int64_t Remainder = ValDiff % IdxDiff;
3198 // Normalize the step if it's greater than 1.
3199 if (Remainder != ValDiff) {
3200 // The difference must cleanly divide the element span.
3201 if (Remainder != 0)
3202 return std::nullopt;
3203 ValDiff /= IdxDiff;
3204 IdxDiff = 1;
3205 }
3206
3207 if (!SeqStepNum)
3208 SeqStepNum = ValDiff;
3209 else if (ValDiff != SeqStepNum)
3210 return std::nullopt;
3211
3212 if (!SeqStepDenom)
3213 SeqStepDenom = IdxDiff;
3214 else if (IdxDiff != *SeqStepDenom)
3215 return std::nullopt;
3216 }
3217
3218 // Record this non-undef element for later.
3219 if (!PrevElt || PrevElt->first != Val)
3220 PrevElt = std::make_pair(Val, Idx);
3221 }
3222
3223 // We need to have logged a step for this to count as a legal index sequence.
3224 if (!SeqStepNum || !SeqStepDenom)
3225 return std::nullopt;
3226
3227 // Loop back through the sequence and validate elements we might have skipped
3228 // while waiting for a valid step. While doing this, log any sequence addend.
3229 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3230 if (Op.getOperand(Idx).isUndef())
3231 continue;
3232 uint64_t Val;
3233 if (IsInteger) {
3234 Val = Op.getConstantOperandVal(Idx) &
3235 maskTrailingOnes<uint64_t>(EltSizeInBits);
3236 } else {
3237 Val = *getExactInteger(
3238 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3239 EltSizeInBits);
3240 }
3241 uint64_t ExpectedVal =
3242 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3243 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
3244 if (!SeqAddend)
3245 SeqAddend = Addend;
3246 else if (Addend != SeqAddend)
3247 return std::nullopt;
3248 }
3249
3250 assert(SeqAddend && "Must have an addend if we have a step");
3251
3252 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3253}
3254
3255// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3256// and lower it as a VRGATHER_VX_VL from the source vector.
3257static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3258 SelectionDAG &DAG,
3259 const RISCVSubtarget &Subtarget) {
3260 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3261 return SDValue();
3262 SDValue Vec = SplatVal.getOperand(0);
3263 // Only perform this optimization on vectors of the same size for simplicity.
3264 // Don't perform this optimization for i1 vectors.
3265 // FIXME: Support i1 vectors, maybe by promoting to i8?
3266 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3267 return SDValue();
3268 SDValue Idx = SplatVal.getOperand(1);
3269 // The index must be a legal type.
3270 if (Idx.getValueType() != Subtarget.getXLenVT())
3271 return SDValue();
3272
3273 MVT ContainerVT = VT;
3274 if (VT.isFixedLengthVector()) {
3275 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3276 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3277 }
3278
3279 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3280
3281 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3282 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3283
3284 if (!VT.isFixedLengthVector())
3285 return Gather;
3286
3287 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3288}
3289
3290
3291/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3292/// which constitute a large proportion of the elements. In such cases we can
3293/// splat a vector with the dominant element and make up the shortfall with
3294/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3295/// Note that this includes vectors of 2 elements by association. The
3296/// upper-most element is the "dominant" one, allowing us to use a splat to
3297/// "insert" the upper element, and an insert of the lower element at position
3298/// 0, which improves codegen.
3300 const RISCVSubtarget &Subtarget) {
3301 MVT VT = Op.getSimpleValueType();
3302 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3303
3304 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3305
3306 SDLoc DL(Op);
3307 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3308
3309 MVT XLenVT = Subtarget.getXLenVT();
3310 unsigned NumElts = Op.getNumOperands();
3311
3312 SDValue DominantValue;
3313 unsigned MostCommonCount = 0;
3314 DenseMap<SDValue, unsigned> ValueCounts;
3315 unsigned NumUndefElts =
3316 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3317
3318 // Track the number of scalar loads we know we'd be inserting, estimated as
3319 // any non-zero floating-point constant. Other kinds of element are either
3320 // already in registers or are materialized on demand. The threshold at which
3321 // a vector load is more desirable than several scalar materializion and
3322 // vector-insertion instructions is not known.
3323 unsigned NumScalarLoads = 0;
3324
3325 for (SDValue V : Op->op_values()) {
3326 if (V.isUndef())
3327 continue;
3328
3329 ValueCounts.insert(std::make_pair(V, 0));
3330 unsigned &Count = ValueCounts[V];
3331 if (0 == Count)
3332 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3333 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3334
3335 // Is this value dominant? In case of a tie, prefer the highest element as
3336 // it's cheaper to insert near the beginning of a vector than it is at the
3337 // end.
3338 if (++Count >= MostCommonCount) {
3339 DominantValue = V;
3340 MostCommonCount = Count;
3341 }
3342 }
3343
3344 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3345 unsigned NumDefElts = NumElts - NumUndefElts;
3346 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3347
3348 // Don't perform this optimization when optimizing for size, since
3349 // materializing elements and inserting them tends to cause code bloat.
3350 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3351 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3352 ((MostCommonCount > DominantValueCountThreshold) ||
3353 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3354 // Start by splatting the most common element.
3355 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3356
3357 DenseSet<SDValue> Processed{DominantValue};
3358
3359 // We can handle an insert into the last element (of a splat) via
3360 // v(f)slide1down. This is slightly better than the vslideup insert
3361 // lowering as it avoids the need for a vector group temporary. It
3362 // is also better than using vmerge.vx as it avoids the need to
3363 // materialize the mask in a vector register.
3364 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3365 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3366 LastOp != DominantValue) {
3367 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3368 auto OpCode =
3370 if (!VT.isFloatingPoint())
3371 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3372 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3373 LastOp, Mask, VL);
3374 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3375 Processed.insert(LastOp);
3376 }
3377
3378 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3379 for (const auto &OpIdx : enumerate(Op->ops())) {
3380 const SDValue &V = OpIdx.value();
3381 if (V.isUndef() || !Processed.insert(V).second)
3382 continue;
3383 if (ValueCounts[V] == 1) {
3384 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3385 DAG.getConstant(OpIdx.index(), DL, XLenVT));
3386 } else {
3387 // Blend in all instances of this value using a VSELECT, using a
3388 // mask where each bit signals whether that element is the one
3389 // we're after.
3391 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3392 return DAG.getConstant(V == V1, DL, XLenVT);
3393 });
3394 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3395 DAG.getBuildVector(SelMaskTy, DL, Ops),
3396 DAG.getSplatBuildVector(VT, DL, V), Vec);
3397 }
3398 }
3399
3400 return Vec;
3401 }
3402
3403 return SDValue();
3404}
3405
3407 const RISCVSubtarget &Subtarget) {
3408 MVT VT = Op.getSimpleValueType();
3409 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3410
3411 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3412
3413 SDLoc DL(Op);
3414 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3415
3416 MVT XLenVT = Subtarget.getXLenVT();
3417 unsigned NumElts = Op.getNumOperands();
3418
3419 if (VT.getVectorElementType() == MVT::i1) {
3420 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3421 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3422 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3423 }
3424
3425 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3426 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3427 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3428 }
3429
3430 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3431 // scalar integer chunks whose bit-width depends on the number of mask
3432 // bits and XLEN.
3433 // First, determine the most appropriate scalar integer type to use. This
3434 // is at most XLenVT, but may be shrunk to a smaller vector element type
3435 // according to the size of the final vector - use i8 chunks rather than
3436 // XLenVT if we're producing a v8i1. This results in more consistent
3437 // codegen across RV32 and RV64.
3438 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3439 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3440 // If we have to use more than one INSERT_VECTOR_ELT then this
3441 // optimization is likely to increase code size; avoid peforming it in
3442 // such a case. We can use a load from a constant pool in this case.
3443 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3444 return SDValue();
3445 // Now we can create our integer vector type. Note that it may be larger
3446 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3447 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3448 MVT IntegerViaVecVT =
3449 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3450 IntegerViaVecElts);
3451
3452 uint64_t Bits = 0;
3453 unsigned BitPos = 0, IntegerEltIdx = 0;
3454 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3455
3456 for (unsigned I = 0; I < NumElts;) {
3457 SDValue V = Op.getOperand(I);
3458 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
3459 Bits |= ((uint64_t)BitValue << BitPos);
3460 ++BitPos;
3461 ++I;
3462
3463 // Once we accumulate enough bits to fill our scalar type or process the
3464 // last element, insert into our vector and clear our accumulated data.
3465 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3466 if (NumViaIntegerBits <= 32)
3467 Bits = SignExtend64<32>(Bits);
3468 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3469 Elts[IntegerEltIdx] = Elt;
3470 Bits = 0;
3471 BitPos = 0;
3472 IntegerEltIdx++;
3473 }
3474 }
3475
3476 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3477
3478 if (NumElts < NumViaIntegerBits) {
3479 // If we're producing a smaller vector than our minimum legal integer
3480 // type, bitcast to the equivalent (known-legal) mask type, and extract
3481 // our final mask.
3482 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3483 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3484 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3485 DAG.getConstant(0, DL, XLenVT));
3486 } else {
3487 // Else we must have produced an integer type with the same size as the
3488 // mask type; bitcast for the final result.
3489 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3490 Vec = DAG.getBitcast(VT, Vec);
3491 }
3492
3493 return Vec;
3494 }
3495
3496 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3497 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3499 if (!VT.isFloatingPoint())
3500 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3501 Splat =
3502 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3503 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3504 }
3505
3506 // Try and match index sequences, which we can lower to the vid instruction
3507 // with optional modifications. An all-undef vector is matched by
3508 // getSplatValue, above.
3509 if (auto SimpleVID = isSimpleVIDSequence(Op)) {
3510 int64_t StepNumerator = SimpleVID->StepNumerator;
3511 unsigned StepDenominator = SimpleVID->StepDenominator;
3512 int64_t Addend = SimpleVID->Addend;
3513
3514 assert(StepNumerator != 0 && "Invalid step");
3515 bool Negate = false;
3516 int64_t SplatStepVal = StepNumerator;
3517 unsigned StepOpcode = ISD::MUL;
3518 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3519 // anyway as the shift of 63 won't fit in uimm5.
3520 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3521 isPowerOf2_64(std::abs(StepNumerator))) {
3522 Negate = StepNumerator < 0;
3523 StepOpcode = ISD::SHL;
3524 SplatStepVal = Log2_64(std::abs(StepNumerator));
3525 }
3526
3527 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3528 // threshold since it's the immediate value many RVV instructions accept.
3529 // There is no vmul.vi instruction so ensure multiply constant can fit in
3530 // a single addi instruction.
3531 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3532 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3533 isPowerOf2_32(StepDenominator) &&
3534 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3535 MVT VIDVT =
3537 MVT VIDContainerVT =
3538 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3539 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3540 // Convert right out of the scalable type so we can use standard ISD
3541 // nodes for the rest of the computation. If we used scalable types with
3542 // these, we'd lose the fixed-length vector info and generate worse
3543 // vsetvli code.
3544 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3545 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3546 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3547 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3548 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3549 }
3550 if (StepDenominator != 1) {
3551 SDValue SplatStep =
3552 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3553 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3554 }
3555 if (Addend != 0 || Negate) {
3556 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3557 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3558 VID);
3559 }
3560 if (VT.isFloatingPoint()) {
3561 // TODO: Use vfwcvt to reduce register pressure.
3562 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3563 }
3564 return VID;
3565 }
3566 }
3567
3568 // For very small build_vectors, use a single scalar insert of a constant.
3569 // TODO: Base this on constant rematerialization cost, not size.
3570 const unsigned EltBitSize = VT.getScalarSizeInBits();
3571 if (VT.getSizeInBits() <= 32 &&
3573 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3574 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3575 "Unexpected sequence type");
3576 // If we can use the original VL with the modified element type, this
3577 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3578 // be moved into InsertVSETVLI?
3579 unsigned ViaVecLen =
3580 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3581 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3582
3583 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3584 uint64_t SplatValue = 0;
3585 // Construct the amalgamated value at this larger vector type.
3586 for (const auto &OpIdx : enumerate(Op->op_values())) {
3587 const auto &SeqV = OpIdx.value();
3588 if (!SeqV.isUndef())
3589 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3590 << (OpIdx.index() * EltBitSize));
3591 }
3592
3593 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3594 // achieve better constant materializion.
3595 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3596 SplatValue = SignExtend64<32>(SplatValue);
3597
3598 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3599 DAG.getUNDEF(ViaVecVT),
3600 DAG.getConstant(SplatValue, DL, XLenVT),
3601 DAG.getConstant(0, DL, XLenVT));
3602 if (ViaVecLen != 1)
3604 MVT::getVectorVT(ViaIntVT, 1), Vec,
3605 DAG.getConstant(0, DL, XLenVT));
3606 return DAG.getBitcast(VT, Vec);
3607 }
3608
3609
3610 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3611 // when re-interpreted as a vector with a larger element type. For example,
3612 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3613 // could be instead splat as
3614 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3615 // TODO: This optimization could also work on non-constant splats, but it
3616 // would require bit-manipulation instructions to construct the splat value.
3617 SmallVector<SDValue> Sequence;
3618 const auto *BV = cast<BuildVectorSDNode>(Op);
3619 if (VT.isInteger() && EltBitSize < 64 &&
3621 BV->getRepeatedSequence(Sequence) &&
3622 (Sequence.size() * EltBitSize) <= 64) {
3623 unsigned SeqLen = Sequence.size();
3624 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3625 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3626 ViaIntVT == MVT::i64) &&
3627 "Unexpected sequence type");
3628
3629 // If we can use the original VL with the modified element type, this
3630 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3631 // be moved into InsertVSETVLI?
3632 const unsigned RequiredVL = NumElts / SeqLen;
3633 const unsigned ViaVecLen =
3634 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3635 NumElts : RequiredVL;
3636 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3637
3638 unsigned EltIdx = 0;
3639 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3640 uint64_t SplatValue = 0;
3641 // Construct the amalgamated value which can be splatted as this larger
3642 // vector type.
3643 for (const auto &SeqV : Sequence) {
3644 if (!SeqV.isUndef())
3645 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3646 << (EltIdx * EltBitSize));
3647 EltIdx++;
3648 }
3649
3650 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3651 // achieve better constant materializion.
3652 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3653 SplatValue = SignExtend64<32>(SplatValue);
3654
3655 // Since we can't introduce illegal i64 types at this stage, we can only
3656 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3657 // way we can use RVV instructions to splat.
3658 assert((ViaIntVT.bitsLE(XLenVT) ||
3659 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3660 "Unexpected bitcast sequence");
3661 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3662 SDValue ViaVL =
3663 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3664 MVT ViaContainerVT =
3665 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3666 SDValue Splat =
3667 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3668 DAG.getUNDEF(ViaContainerVT),
3669 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3670 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3671 if (ViaVecLen != RequiredVL)
3673 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3674 DAG.getConstant(0, DL, XLenVT));
3675 return DAG.getBitcast(VT, Splat);
3676 }
3677 }
3678
3679 // If the number of signbits allows, see if we can lower as a <N x i8>.
3680 // Our main goal here is to reduce LMUL (and thus work) required to
3681 // build the constant, but we will also narrow if the resulting
3682 // narrow vector is known to materialize cheaply.
3683 // TODO: We really should be costing the smaller vector. There are
3684 // profitable cases this misses.
3685 if (EltBitSize > 8 && VT.isInteger() &&
3686 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3687 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3688 if (EltBitSize - SignBits < 8) {
3689 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3690 DL, Op->ops());
3691 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3692 Source, DAG, Subtarget);
3693 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3694 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3695 }
3696 }
3697
3698 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3699 return Res;
3700
3701 // For constant vectors, use generic constant pool lowering. Otherwise,
3702 // we'd have to materialize constants in GPRs just to move them into the
3703 // vector.
3704 return SDValue();
3705}
3706
3708 const RISCVSubtarget &Subtarget) {
3709 MVT VT = Op.getSimpleValueType();
3710 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3711
3712 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3714 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3715
3716 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3717
3718 SDLoc DL(Op);
3719 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3720
3721 MVT XLenVT = Subtarget.getXLenVT();
3722
3723 if (VT.getVectorElementType() == MVT::i1) {
3724 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3725 // vector type, we have a legal equivalently-sized i8 type, so we can use
3726 // that.
3727 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3728 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3729
3730 SDValue WideVec;
3731 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3732 // For a splat, perform a scalar truncate before creating the wider
3733 // vector.
3734 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3735 DAG.getConstant(1, DL, Splat.getValueType()));
3736 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3737 } else {
3738 SmallVector<SDValue, 8> Ops(Op->op_values());
3739 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3740 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3741 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3742 }
3743
3744 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3745 }
3746
3747 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3748 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3749 return Gather;
3750 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3752 if (!VT.isFloatingPoint())
3753 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3754 Splat =
3755 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3756 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3757 }
3758
3759 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3760 return Res;
3761
3762 // If we're compiling for an exact VLEN value, we can split our work per
3763 // register in the register group.
3764 const unsigned MinVLen = Subtarget.getRealMinVLen();
3765 const unsigned MaxVLen = Subtarget.getRealMaxVLen();
3766 if (MinVLen == MaxVLen && VT.getSizeInBits().getKnownMinValue() > MinVLen) {
3767 MVT ElemVT = VT.getVectorElementType();
3768 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
3769 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3770 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3771 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3772 assert(M1VT == getLMUL1VT(M1VT));
3773
3774 // The following semantically builds up a fixed length concat_vector
3775 // of the component build_vectors. We eagerly lower to scalable and
3776 // insert_subvector here to avoid DAG combining it back to a large
3777 // build_vector.
3778 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3779 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3780 SDValue Vec = DAG.getUNDEF(ContainerVT);
3781 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3782 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3783 SDValue SubBV =
3784 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3785 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3786 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3787 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3788 DAG.getVectorIdxConstant(InsertIdx, DL));
3789 }
3790 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3791 }
3792
3793 // Cap the cost at a value linear to the number of elements in the vector.
3794 // The default lowering is to use the stack. The vector store + scalar loads
3795 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3796 // being (at least) linear in LMUL. As a result, using the vslidedown
3797 // lowering for every element ends up being VL*LMUL..
3798 // TODO: Should we be directly costing the stack alternative? Doing so might
3799 // give us a more accurate upper bound.
3800 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3801
3802 // TODO: unify with TTI getSlideCost.
3803 InstructionCost PerSlideCost = 1;
3804 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3805 default: break;
3807 PerSlideCost = 2;
3808 break;
3810 PerSlideCost = 4;
3811 break;
3813 PerSlideCost = 8;
3814 break;
3815 }
3816
3817 // TODO: Should we be using the build instseq then cost + evaluate scheme
3818 // we use for integer constants here?
3819 unsigned UndefCount = 0;
3820 for (const SDValue &V : Op->ops()) {
3821 if (V.isUndef()) {
3822 UndefCount++;
3823 continue;
3824 }
3825 if (UndefCount) {
3826 LinearBudget -= PerSlideCost;
3827 UndefCount = 0;
3828 }
3829 LinearBudget -= PerSlideCost;
3830 }
3831 if (UndefCount) {
3832 LinearBudget -= PerSlideCost;
3833 }
3834
3835 if (LinearBudget < 0)
3836 return SDValue();
3837
3838 assert((!VT.isFloatingPoint() ||
3839 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3840 "Illegal type which will result in reserved encoding");
3841
3842 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3843
3844 SDValue Vec;
3845 UndefCount = 0;
3846 for (SDValue V : Op->ops()) {
3847 if (V.isUndef()) {
3848 UndefCount++;
3849 continue;
3850 }
3851
3852 // Start our sequence with a TA splat in the hopes that hardware is able to
3853 // recognize there's no dependency on the prior value of our temporary
3854 // register.
3855 if (!Vec) {
3856 Vec = DAG.getSplatVector(VT, DL, V);
3857 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3858 UndefCount = 0;
3859 continue;
3860 }
3861
3862 if (UndefCount) {
3863 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3864 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3865 Vec, Offset, Mask, VL, Policy);
3866 UndefCount = 0;
3867 }
3868 auto OpCode =
3870 if (!VT.isFloatingPoint())
3871 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
3872 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3873 V, Mask, VL);
3874 }
3875 if (UndefCount) {
3876 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3877 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3878 Vec, Offset, Mask, VL, Policy);
3879 }
3880 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3881}
3882
3883static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3885 SelectionDAG &DAG) {
3886 if (!Passthru)
3887 Passthru = DAG.getUNDEF(VT);
3888 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3889 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3890 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3891 // If Hi constant is all the same sign bit as Lo, lower this as a custom
3892 // node in order to try and match RVV vector/scalar instructions.
3893 if ((LoC >> 31) == HiC)
3894 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3895
3896 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
3897 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
3898 // vlmax vsetvli or vsetivli to change the VL.
3899 // FIXME: Support larger constants?
3900 // FIXME: Support non-constant VLs by saturating?
3901 if (LoC == HiC) {
3902 SDValue NewVL;
3903 if (isAllOnesConstant(VL) ||
3904 (isa<RegisterSDNode>(VL) &&
3905 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
3906 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
3907 else if (isa<ConstantSDNode>(VL) &&
3908 isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue()))
3909 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
3910
3911 if (NewVL) {
3912 MVT InterVT =
3913 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3914 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
3915 DAG.getUNDEF(InterVT), Lo,
3916 DAG.getRegister(RISCV::X0, MVT::i32));
3917 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
3918 }
3919 }
3920 }
3921
3922 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3923 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3924 isa<ConstantSDNode>(Hi.getOperand(1)) &&
3925 Hi.getConstantOperandVal(1) == 31)
3926 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3927
3928 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
3929 // even if it might be sign extended.
3930 if (Hi.isUndef())
3931 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3932
3933 // Fall back to a stack store and stride x0 vector load.
3934 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
3935 Hi, VL);
3936}
3937
3938// Called by type legalization to handle splat of i64 on RV32.
3939// FIXME: We can optimize this when the type has sign or zero bits in one
3940// of the halves.
3941static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3942 SDValue Scalar, SDValue VL,
3943 SelectionDAG &DAG) {
3944 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
3945 SDValue Lo, Hi;
3946 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
3947 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
3948}
3949
3950// This function lowers a splat of a scalar operand Splat with the vector
3951// length VL. It ensures the final sequence is type legal, which is useful when
3952// lowering a splat after type legalization.
3953static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
3954 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
3955 const RISCVSubtarget &Subtarget) {
3956 bool HasPassthru = Passthru && !Passthru.isUndef();
3957 if (!HasPassthru && !Passthru)
3958 Passthru = DAG.getUNDEF(VT);
3959 if (VT.isFloatingPoint())
3960 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
3961
3962 MVT XLenVT = Subtarget.getXLenVT();
3963
3964 // Simplest case is that the operand needs to be promoted to XLenVT.
3965 if (Scalar.getValueType().bitsLE(XLenVT)) {
3966 // If the operand is a constant, sign extend to increase our chances
3967 // of being able to use a .vi instruction. ANY_EXTEND would become a
3968 // a zero extend and the simm5 check in isel would fail.
3969 // FIXME: Should we ignore the upper bits in isel instead?
3970 unsigned ExtOpc =
3971 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3972 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3973 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3974 }
3975
3976 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
3977 "Unexpected scalar for splat lowering!");
3978
3979 if (isOneConstant(VL) && isNullConstant(Scalar))
3980 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
3981 DAG.getConstant(0, DL, XLenVT), VL);
3982
3983 // Otherwise use the more complicated splatting algorithm.
3984 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
3985}
3986
3987// This function lowers an insert of a scalar operand Scalar into lane
3988// 0 of the vector regardless of the value of VL. The contents of the
3989// remaining lanes of the result vector are unspecified. VL is assumed
3990// to be non-zero.
3992 const SDLoc &DL, SelectionDAG &DAG,
3993 const RISCVSubtarget &Subtarget) {
3994 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
3995
3996 const MVT XLenVT = Subtarget.getXLenVT();
3997 SDValue Passthru = DAG.getUNDEF(VT);
3998
3999 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4000 isNullConstant(Scalar.getOperand(1))) {
4001 SDValue ExtractedVal = Scalar.getOperand(0);
4002 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4003 MVT ExtractedContainerVT = ExtractedVT;
4004 if (ExtractedContainerVT.isFixedLengthVector()) {
4005 ExtractedContainerVT = getContainerForFixedLengthVector(
4006 DAG, ExtractedContainerVT, Subtarget);
4007 ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal,
4008 DAG, Subtarget);
4009 }
4010 if (ExtractedContainerVT.bitsLE(VT))
4011 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal,
4012 DAG.getConstant(0, DL, XLenVT));
4013 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4014 DAG.getConstant(0, DL, XLenVT));
4015 }
4016
4017
4018 if (VT.isFloatingPoint())
4019 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4020 DAG.getUNDEF(VT), Scalar, VL);
4021
4022 // Avoid the tricky legalization cases by falling back to using the
4023 // splat code which already handles it gracefully.
4024 if (!Scalar.getValueType().bitsLE(XLenVT))
4025 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4026 DAG.getConstant(1, DL, XLenVT),
4027 VT, DL, DAG, Subtarget);
4028
4029 // If the operand is a constant, sign extend to increase our chances
4030 // of being able to use a .vi instruction. ANY_EXTEND would become a
4031 // a zero extend and the simm5 check in isel would fail.
4032 // FIXME: Should we ignore the upper bits in isel instead?
4033 unsigned ExtOpc =
4034 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4035 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4036 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4037 DAG.getUNDEF(VT), Scalar, VL);
4038}
4039
4040// Is this a shuffle extracts either the even or odd elements of a vector?
4041// That is, specifically, either (a) or (b) below.
4042// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4043// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4044// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4045// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4046// Returns {Src Vector, Even Elements} om success
4047static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4048 SDValue V2, ArrayRef<int> Mask,
4049 const RISCVSubtarget &Subtarget) {
4050 // Need to be able to widen the vector.
4051 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4052 return false;
4053
4054 // Both input must be extracts.
4055 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4056 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4057 return false;
4058
4059 // Extracting from the same source.
4060 SDValue Src = V1.getOperand(0);
4061 if (Src != V2.getOperand(0))
4062 return false;
4063
4064 // Src needs to have twice the number of elements.
4065 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4066 return false;
4067
4068 // The extracts must extract the two halves of the source.
4069 if (V1.getConstantOperandVal(1) != 0 ||
4070 V2.getConstantOperandVal(1) != Mask.size())
4071 return false;
4072
4073 // First index must be the first even or odd element from V1.
4074 if (Mask[0] != 0 && Mask[0] != 1)
4075 return false;
4076
4077 // The others must increase by 2 each time.
4078 // TODO: Support undef elements?
4079 for (unsigned i = 1; i != Mask.size(); ++i)
4080 if (Mask[i] != Mask[i - 1] + 2)
4081 return false;
4082
4083 return true;
4084}
4085
4086/// Is this shuffle interleaving contiguous elements from one vector into the
4087/// even elements and contiguous elements from another vector into the odd
4088/// elements. \p EvenSrc will contain the element that should be in the first
4089/// even element. \p OddSrc will contain the element that should be in the first
4090/// odd element. These can be the first element in a source or the element half
4091/// way through the source.
4092static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4093 int &OddSrc, const RISCVSubtarget &Subtarget) {
4094 // We need to be able to widen elements to the next larger integer type.
4095 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4096 return false;
4097
4098 int Size = Mask.size();
4099 int NumElts = VT.getVectorNumElements();
4100 assert(Size == (int)NumElts && "Unexpected mask size");
4101
4102 SmallVector<unsigned, 2> StartIndexes;
4103 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4104 return false;
4105
4106 EvenSrc = StartIndexes[0];
4107 OddSrc = StartIndexes[1];
4108
4109 // One source should be low half of first vector.
4110 if (EvenSrc != 0 && OddSrc != 0)
4111 return false;
4112
4113 // Subvectors will be subtracted from either at the start of the two input
4114 // vectors, or at the start and middle of the first vector if it's an unary
4115 // interleave.
4116 // In both cases, HalfNumElts will be extracted.
4117 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4118 // we'll create an illegal extract_subvector.
4119 // FIXME: We could support other values using a slidedown first.
4120 int HalfNumElts = NumElts / 2;
4121 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4122}
4123
4124/// Match shuffles that concatenate two vectors, rotate the concatenation,
4125/// and then extract the original number of elements from the rotated result.
4126/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4127/// returned rotation amount is for a rotate right, where elements move from
4128/// higher elements to lower elements. \p LoSrc indicates the first source
4129/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4130/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4131/// 0 or 1 if a rotation is found.
4132///
4133/// NOTE: We talk about rotate to the right which matches how bit shift and
4134/// rotate instructions are described where LSBs are on the right, but LLVM IR
4135/// and the table below write vectors with the lowest elements on the left.
4136static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4137 int Size = Mask.size();
4138
4139 // We need to detect various ways of spelling a rotation:
4140 // [11, 12, 13, 14, 15, 0, 1, 2]
4141 // [-1, 12, 13, 14, -1, -1, 1, -1]
4142 // [-1, -1, -1, -1, -1, -1, 1, 2]
4143 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4144 // [-1, 4, 5, 6, -1, -1, 9, -1]
4145 // [-1, 4, 5, 6, -1, -1, -1, -1]
4146 int Rotation = 0;
4147 LoSrc = -1;
4148 HiSrc = -1;
4149 for (int i = 0; i != Size; ++i) {
4150 int M = Mask[i];
4151 if (M < 0)
4152 continue;
4153
4154 // Determine where a rotate vector would have started.
4155 int StartIdx = i - (M % Size);
4156 // The identity rotation isn't interesting, stop.
4157 if (StartIdx == 0)
4158 return -1;
4159
4160 // If we found the tail of a vector the rotation must be the missing
4161 // front. If we found the head of a vector, it must be how much of the
4162 // head.
4163 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4164
4165 if (Rotation == 0)
4166 Rotation = CandidateRotation;
4167 else if (Rotation != CandidateRotation)
4168 // The rotations don't match, so we can't match this mask.
4169 return -1;
4170
4171 // Compute which value this mask is pointing at.
4172 int MaskSrc = M < Size ? 0 : 1;
4173
4174 // Compute which of the two target values this index should be assigned to.
4175 // This reflects whether the high elements are remaining or the low elemnts
4176 // are remaining.
4177 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4178
4179 // Either set up this value if we've not encountered it before, or check
4180 // that it remains consistent.
4181 if (TargetSrc < 0)
4182 TargetSrc = MaskSrc;
4183 else if (TargetSrc != MaskSrc)
4184 // This may be a rotation, but it pulls from the inputs in some
4185 // unsupported interleaving.
4186 return -1;
4187 }
4188
4189 // Check that we successfully analyzed the mask, and normalize the results.
4190 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4191 assert((LoSrc >= 0 || HiSrc >= 0) &&
4192 "Failed to find a rotated input vector!");
4193
4194 return Rotation;
4195}
4196
4197// Lower a deinterleave shuffle to vnsrl.
4198// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4199// -> [p, q, r, s] (EvenElts == false)
4200// VT is the type of the vector to return, <[vscale x ]n x ty>
4201// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4203 bool EvenElts,
4204 const RISCVSubtarget &Subtarget,
4205 SelectionDAG &DAG) {
4206 // The result is a vector of type <m x n x ty>
4207 MVT ContainerVT = VT;
4208 // Convert fixed vectors to scalable if needed
4209 if (ContainerVT.isFixedLengthVector()) {
4210 assert(Src.getSimpleValueType().isFixedLengthVector());
4211 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4212
4213