LLVM 17.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISCV uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
33#include "llvm/IR/IRBuilder.h"
35#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/Support/Debug.h"
43#include <optional>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "riscv-lower"
48
49STATISTIC(NumTailCalls, "Number of tail calls");
50
52 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
53 cl::desc("Give the maximum size (in number of nodes) of the web of "
54 "instructions that we will consider for VW expansion"),
55 cl::init(18));
56
57static cl::opt<bool>
58 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
59 cl::desc("Allow the formation of VW_W operations (e.g., "
60 "VWADD_W) with splat constants"),
61 cl::init(false));
62
64 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
65 cl::desc("Set the minimum number of repetitions of a divisor to allow "
66 "transformation to multiplications by the reciprocal"),
67 cl::init(2));
68
69static cl::opt<int>
71 cl::desc("Give the maximum number of instructions that we will "
72 "use for creating a floating-point immediate value"),
73 cl::init(2));
74
76 const RISCVSubtarget &STI)
77 : TargetLowering(TM), Subtarget(STI) {
78
79 if (Subtarget.isRVE())
80 report_fatal_error("Codegen not yet implemented for RVE");
81
82 RISCVABI::ABI ABI = Subtarget.getTargetABI();
83 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
84
85 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
86 !Subtarget.hasStdExtF()) {
87 errs() << "Hard-float 'f' ABI can't be used for a target that "
88 "doesn't support the F instruction set extension (ignoring "
89 "target-abi)\n";
91 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
92 !Subtarget.hasStdExtD()) {
93 errs() << "Hard-float 'd' ABI can't be used for a target that "
94 "doesn't support the D instruction set extension (ignoring "
95 "target-abi)\n";
97 }
98
99 switch (ABI) {
100 default:
101 report_fatal_error("Don't know how to lower this ABI");
108 break;
109 }
110
111 MVT XLenVT = Subtarget.getXLenVT();
112
113 // Set up the register classes.
114 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
115
116 if (Subtarget.hasStdExtZfhOrZfhmin())
117 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
118 if (Subtarget.hasStdExtF())
119 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
120 if (Subtarget.hasStdExtD())
121 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
122
123 static const MVT::SimpleValueType BoolVecVTs[] = {
126 static const MVT::SimpleValueType IntVecVTs[] = {
132 static const MVT::SimpleValueType F16VecVTs[] = {
135 static const MVT::SimpleValueType F32VecVTs[] = {
137 static const MVT::SimpleValueType F64VecVTs[] = {
139
140 if (Subtarget.hasVInstructions()) {
141 auto addRegClassForRVV = [this](MVT VT) {
142 // Disable the smallest fractional LMUL types if ELEN is less than
143 // RVVBitsPerBlock.
144 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
145 if (VT.getVectorMinNumElements() < MinElts)
146 return;
147
148 unsigned Size = VT.getSizeInBits().getKnownMinValue();
149 const TargetRegisterClass *RC;
151 RC = &RISCV::VRRegClass;
152 else if (Size == 2 * RISCV::RVVBitsPerBlock)
153 RC = &RISCV::VRM2RegClass;
154 else if (Size == 4 * RISCV::RVVBitsPerBlock)
155 RC = &RISCV::VRM4RegClass;
156 else if (Size == 8 * RISCV::RVVBitsPerBlock)
157 RC = &RISCV::VRM8RegClass;
158 else
159 llvm_unreachable("Unexpected size");
160
161 addRegisterClass(VT, RC);
162 };
163
164 for (MVT VT : BoolVecVTs)
165 addRegClassForRVV(VT);
166 for (MVT VT : IntVecVTs) {
167 if (VT.getVectorElementType() == MVT::i64 &&
168 !Subtarget.hasVInstructionsI64())
169 continue;
170 addRegClassForRVV(VT);
171 }
172
173 if (Subtarget.hasVInstructionsF16())
174 for (MVT VT : F16VecVTs)
175 addRegClassForRVV(VT);
176
177 if (Subtarget.hasVInstructionsF32())
178 for (MVT VT : F32VecVTs)
179 addRegClassForRVV(VT);
180
181 if (Subtarget.hasVInstructionsF64())
182 for (MVT VT : F64VecVTs)
183 addRegClassForRVV(VT);
184
185 if (Subtarget.useRVVForFixedLengthVectors()) {
186 auto addRegClassForFixedVectors = [this](MVT VT) {
187 MVT ContainerVT = getContainerForFixedLengthVector(VT);
188 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
189 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
190 addRegisterClass(VT, TRI.getRegClass(RCID));
191 };
193 if (useRVVForFixedLengthVectorVT(VT))
194 addRegClassForFixedVectors(VT);
195
197 if (useRVVForFixedLengthVectorVT(VT))
198 addRegClassForFixedVectors(VT);
199 }
200 }
201
202 // Compute derived properties from the register classes.
204
206
209 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
212
213 // TODO: add all necessary setOperationAction calls.
215
220
227
229
232
234
236
237 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
239
240 if (Subtarget.is64Bit()) {
242
244
247
250 } else {
252 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
253 nullptr);
254 setLibcallName(RTLIB::MULO_I64, nullptr);
255 }
256
257 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
259 } else {
260 if (Subtarget.is64Bit()) {
262 } else {
264 }
265 }
266
267 if (!Subtarget.hasStdExtM()) {
269 XLenVT, Expand);
270 } else {
271 if (Subtarget.is64Bit()) {
274 }
275 }
276
279 Expand);
280
282 Custom);
283
284 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
285 Subtarget.hasVendorXTHeadBb()) {
286 if (Subtarget.is64Bit())
288 } else {
290 }
291
292 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
293 // pattern match it directly in isel.
295 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
296 Subtarget.hasVendorXTHeadBb())
297 ? Legal
298 : Expand);
299 // Zbkb can use rev8+brev8 to implement bitreverse.
301 Subtarget.hasStdExtZbkb() ? Custom : Expand);
302
303 if (Subtarget.hasStdExtZbb()) {
305 Legal);
306
307 if (Subtarget.is64Bit())
311 } else {
313 }
314
315 if (Subtarget.hasVendorXTHeadBb()) {
317
318 // We need the custom lowering to make sure that the resulting sequence
319 // for the 32bit case is efficient on 64bit targets.
320 if (Subtarget.is64Bit())
322 }
323
324 if (Subtarget.is64Bit())
326
327 if (!Subtarget.hasVendorXVentanaCondOps() &&
328 !Subtarget.hasVendorXTHeadCondMov())
330
331 static const unsigned FPLegalNodeTypes[] = {
338
339 static const ISD::CondCode FPCCToExpand[] = {
343
344 static const unsigned FPOpToExpand[] = {
347
348 static const unsigned FPRndMode[] = {
351
352 if (Subtarget.hasStdExtZfhOrZfhmin())
354
355 if (Subtarget.hasStdExtZfhOrZfhmin()) {
356 if (Subtarget.hasStdExtZfh()) {
357 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
358 setOperationAction(FPRndMode, MVT::f16,
359 Subtarget.hasStdExtZfa() ? Legal : Custom);
361 } else {
362 static const unsigned ZfhminPromoteOps[] = {
372
373 setOperationAction(ZfhminPromoteOps, MVT::f16, Promote);
376 MVT::f16, Legal);
377 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
378 // DAGCombiner::visitFP_ROUND probably needs improvements first.
380 }
381
384 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
387
389 Subtarget.hasStdExtZfa() ? Legal : Promote);
394
395 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
396 // complete support for all operations in LegalizeDAG.
402
403 // We need to custom promote this.
404 if (Subtarget.is64Bit())
406 }
407
408 if (Subtarget.hasStdExtF()) {
409 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
410 setOperationAction(FPRndMode, MVT::f32,
411 Subtarget.hasStdExtZfa() ? Legal : Custom);
412 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
416 setOperationAction(FPOpToExpand, MVT::f32, Expand);
419
420 if (Subtarget.hasStdExtZfa())
422 }
423
424 if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
426
427 if (Subtarget.hasStdExtD()) {
428 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
429
430 if (Subtarget.hasStdExtZfa()) {
431 setOperationAction(FPRndMode, MVT::f64, Legal);
435 }
436
437 if (Subtarget.is64Bit())
438 setOperationAction(FPRndMode, MVT::f64,
439 Subtarget.hasStdExtZfa() ? Legal : Custom);
440
443 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
449 setOperationAction(FPOpToExpand, MVT::f64, Expand);
452 }
453
454 if (Subtarget.is64Bit())
458
459 if (Subtarget.hasStdExtF()) {
461 Custom);
462
465 XLenVT, Legal);
466
469 }
470
473 XLenVT, Custom);
474
476
477 if (Subtarget.is64Bit())
479
480 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
481 // Unfortunately this can't be determined just from the ISA naming string.
483 Subtarget.is64Bit() ? Legal : Custom);
484
487 if (Subtarget.is64Bit())
489
490 if (Subtarget.hasStdExtA()) {
493 } else if (Subtarget.hasForcedAtomics()) {
495 } else {
497 }
498
500
502
503 if (Subtarget.hasVInstructions()) {
505
507
508 // RVV intrinsics may have illegal operands.
509 // We also need to custom legalize vmv.x.s.
512 if (Subtarget.is64Bit())
514 else
517
520
521 static const unsigned IntegerVPOps[] = {
522 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
523 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
524 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
525 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
526 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
527 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
528 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
529 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
530 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
531 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
532 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
533 ISD::VP_ABS};
534
535 static const unsigned FloatingPointVPOps[] = {
536 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
537 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
538 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
539 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
540 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
541 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
542 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
543 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
544 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
545 ISD::VP_FRINT, ISD::VP_FNEARBYINT};
546
547 static const unsigned IntegerVecReduceOps[] = {
551
552 static const unsigned FloatingPointVecReduceOps[] = {
555
556 if (!Subtarget.is64Bit()) {
557 // We must custom-lower certain vXi64 operations on RV32 due to the vector
558 // element type being illegal.
561
562 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
563
564 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
565 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
566 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
567 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
569 }
570
571 for (MVT VT : BoolVecVTs) {
572 if (!isTypeLegal(VT))
573 continue;
574
576
577 // Mask VTs are custom-expanded into a series of standard nodes
580 VT, Custom);
581
583 Custom);
584
587 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
588 Expand);
589
590 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
591
594 Custom);
595
597 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
598 Custom);
599
600 // RVV has native int->float & float->int conversions where the
601 // element type sizes are within one power-of-two of each other. Any
602 // wider distances between type sizes have to be lowered as sequences
603 // which progressively narrow the gap in stages.
606 VT, Custom);
608 Custom);
609
610 // Expand all extending loads to types larger than this, and truncating
611 // stores from types larger than this.
613 setTruncStoreAction(OtherVT, VT, Expand);
615 VT, Expand);
616 }
617
618 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
619 ISD::VP_TRUNCATE, ISD::VP_SETCC},
620 VT, Custom);
621
624
626
629 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
630 }
631
632 for (MVT VT : IntVecVTs) {
633 if (!isTypeLegal(VT))
634 continue;
635
638
639 // Vectors implement MULHS/MULHU.
641
642 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
643 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
645
647 Legal);
648
650
652
654 setOperationAction({ISD::VP_BSWAP, ISD::VP_BITREVERSE}, VT, Expand);
655 setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand);
656 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
657 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
658 VT, Expand);
659
660 // Custom-lower extensions and truncations from/to mask types.
662 VT, Custom);
663
664 // RVV has native int->float & float->int conversions where the
665 // element type sizes are within one power-of-two of each other. Any
666 // wider distances between type sizes have to be lowered as sequences
667 // which progressively narrow the gap in stages.
670 VT, Custom);
672 Custom);
673
676
677 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
678 // nodes which truncate by one power of two at a time.
680
681 // Custom-lower insert/extract operations to simplify patterns.
683 Custom);
684
685 // Custom-lower reduction operations to set up the corresponding custom
686 // nodes' operands.
687 setOperationAction(IntegerVecReduceOps, VT, Custom);
688
689 setOperationAction(IntegerVPOps, VT, Custom);
690
692
694 VT, Custom);
695
697 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
698 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
699 VT, Custom);
700
703 VT, Custom);
704
707
709
711 setTruncStoreAction(VT, OtherVT, Expand);
713 VT, Expand);
714 }
715
718
719 // Splice
721
722 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the range
723 // of f32.
724 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
725 if (isTypeLegal(FloatVT)) {
728 Custom);
729 }
730 }
731
732 // Expand various CCs to best match the RVV ISA, which natively supports UNE
733 // but no other unordered comparisons, and supports all ordered comparisons
734 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
735 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
736 // and we pattern-match those back to the "original", swapping operands once
737 // more. This way we catch both operations and both "vf" and "fv" forms with
738 // fewer patterns.
739 static const ISD::CondCode VFPCCToExpand[] = {
743 };
744
745 // Sets common operation actions on RVV floating-point vector types.
746 const auto SetCommonVFPActions = [&](MVT VT) {
748 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
749 // sizes are within one power-of-two of each other. Therefore conversions
750 // between vXf16 and vXf64 must be lowered as sequences which convert via
751 // vXf32.
753 // Custom-lower insert/extract operations to simplify patterns.
755 Custom);
756 // Expand various condition codes (explained above).
757 setCondCodeAction(VFPCCToExpand, VT, Expand);
758
760
763 VT, Custom);
764
765 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
766
767 // Expand FP operations that need libcalls.
780
782
784
786 VT, Custom);
787
789 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
790 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
791 VT, Custom);
792
795
798 VT, Custom);
799
802
804
805 setOperationAction(FloatingPointVPOps, VT, Custom);
806
810 VT, Legal);
811 };
812
813 // Sets common extload/truncstore actions on RVV floating-point vector
814 // types.
815 const auto SetCommonVFPExtLoadTruncStoreActions =
816 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
817 for (auto SmallVT : SmallerVTs) {
818 setTruncStoreAction(VT, SmallVT, Expand);
819 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
820 }
821 };
822
823 if (Subtarget.hasVInstructionsF16()) {
824 for (MVT VT : F16VecVTs) {
825 if (!isTypeLegal(VT))
826 continue;
827 SetCommonVFPActions(VT);
828 }
829 }
830
831 if (Subtarget.hasVInstructionsF32()) {
832 for (MVT VT : F32VecVTs) {
833 if (!isTypeLegal(VT))
834 continue;
835 SetCommonVFPActions(VT);
836 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
837 }
838 }
839
840 if (Subtarget.hasVInstructionsF64()) {
841 for (MVT VT : F64VecVTs) {
842 if (!isTypeLegal(VT))
843 continue;
844 SetCommonVFPActions(VT);
845 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
846 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
847 }
848 }
849
850 if (Subtarget.useRVVForFixedLengthVectors()) {
852 if (!useRVVForFixedLengthVectorVT(VT))
853 continue;
854
855 // By default everything must be expanded.
856 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
857 setOperationAction(Op, VT, Expand);
859 setTruncStoreAction(VT, OtherVT, Expand);
861 OtherVT, VT, Expand);
862 }
863
864 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
866 Custom);
867
869 Custom);
870
872 VT, Custom);
873
875
877
879
881
883
886 Custom);
887
889 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
890 Custom);
891
894 VT, Custom);
896 Custom);
897
898 // Operations below are different for between masks and other vectors.
899 if (VT.getVectorElementType() == MVT::i1) {
900 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
902 VT, Custom);
903
904 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
905 ISD::VP_SETCC, ISD::VP_TRUNCATE},
906 VT, Custom);
907 continue;
908 }
909
910 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
911 // it before type legalization for i64 vectors on RV32. It will then be
912 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
913 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
914 // improvements first.
915 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
918 }
919
921
924
925 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
926 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
927 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
928 ISD::VP_SCATTER},
929 VT, Custom);
930
934 VT, Custom);
935
938
939 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
940 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
942
945 Custom);
946
949
952
953 // Custom-lower reduction operations to set up the corresponding custom
954 // nodes' operands.
958 VT, Custom);
959
960 setOperationAction(IntegerVPOps, VT, Custom);
961
962 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
963 // range of f32.
965 if (isTypeLegal(FloatVT))
968 Custom);
969 }
970
972 if (!useRVVForFixedLengthVectorVT(VT))
973 continue;
974
975 // By default everything must be expanded.
976 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
977 setOperationAction(Op, VT, Expand);
978 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
979 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
980 setTruncStoreAction(VT, OtherVT, Expand);
981 }
982
983 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
985 Custom);
986
990 VT, Custom);
991
994 VT, Custom);
995
996 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
997 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
998 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
999 ISD::VP_SCATTER},
1000 VT, Custom);
1001
1005 VT, Custom);
1006
1008
1011 VT, Custom);
1012
1013 setCondCodeAction(VFPCCToExpand, VT, Expand);
1014
1017
1019
1020 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1021
1022 setOperationAction(FloatingPointVPOps, VT, Custom);
1023
1027 VT, Custom);
1028 }
1029
1030 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1032 Custom);
1033 if (Subtarget.hasStdExtZfhOrZfhmin())
1035 if (Subtarget.hasStdExtF())
1037 if (Subtarget.hasStdExtD())
1039 }
1040 }
1041
1042 if (Subtarget.hasForcedAtomics()) {
1043 // Set atomic rmw/cas operations to expand to force __sync libcalls.
1049 XLenVT, Expand);
1050 }
1051
1052 if (Subtarget.hasVendorXTHeadMemIdx()) {
1053 for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
1054 ++im) {
1061
1062 if (Subtarget.is64Bit()) {
1065 }
1066 }
1067 }
1068
1069 // Function alignments.
1070 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1071 setMinFunctionAlignment(FunctionAlignment);
1072 // Set preferred alignments.
1075
1077
1078 // Jumps are expensive, compared to logic
1080
1083 if (Subtarget.is64Bit())
1085
1086 if (Subtarget.hasStdExtF())
1088
1089 if (Subtarget.hasStdExtZbb())
1091
1092 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1094
1095 if (Subtarget.hasStdExtZbkb())
1097 if (Subtarget.hasStdExtZfhOrZfhmin())
1099 if (Subtarget.hasStdExtF())
1102 if (Subtarget.hasVInstructions())
1104 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1106 if (Subtarget.hasVendorXTHeadMemPair())
1108 if (Subtarget.useRVVForFixedLengthVectors())
1110
1111 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1112 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1113}
1114
1116 LLVMContext &Context,
1117 EVT VT) const {
1118 if (!VT.isVector())
1119 return getPointerTy(DL);
1120 if (Subtarget.hasVInstructions() &&
1121 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1124}
1125
1126MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1127 return Subtarget.getXLenVT();
1128}
1129
1131 const CallInst &I,
1132 MachineFunction &MF,
1133 unsigned Intrinsic) const {
1134 auto &DL = I.getModule()->getDataLayout();
1135 switch (Intrinsic) {
1136 default:
1137 return false;
1138 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1139 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1140 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1141 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1142 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1143 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1144 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1145 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1146 case Intrinsic::riscv_masked_cmpxchg_i32:
1148 Info.memVT = MVT::i32;
1149 Info.ptrVal = I.getArgOperand(0);
1150 Info.offset = 0;
1151 Info.align = Align(4);
1154 return true;
1155 case Intrinsic::riscv_masked_strided_load:
1157 Info.ptrVal = I.getArgOperand(1);
1158 Info.memVT = getValueType(DL, I.getType()->getScalarType());
1159 Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
1162 return true;
1163 case Intrinsic::riscv_masked_strided_store:
1165 Info.ptrVal = I.getArgOperand(1);
1166 Info.memVT =
1167 getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
1168 Info.align = Align(
1169 DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
1170 8);
1173 return true;
1174 case Intrinsic::riscv_seg2_load:
1175 case Intrinsic::riscv_seg3_load:
1176 case Intrinsic::riscv_seg4_load:
1177 case Intrinsic::riscv_seg5_load:
1178 case Intrinsic::riscv_seg6_load:
1179 case Intrinsic::riscv_seg7_load:
1180 case Intrinsic::riscv_seg8_load:
1182 Info.ptrVal = I.getArgOperand(0);
1183 Info.memVT =
1184 getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
1185 Info.align =
1186 Align(DL.getTypeSizeInBits(
1187 I.getType()->getStructElementType(0)->getScalarType()) /
1188 8);
1191 return true;
1192 case Intrinsic::riscv_seg2_store:
1193 case Intrinsic::riscv_seg3_store:
1194 case Intrinsic::riscv_seg4_store:
1195 case Intrinsic::riscv_seg5_store:
1196 case Intrinsic::riscv_seg6_store:
1197 case Intrinsic::riscv_seg7_store:
1198 case Intrinsic::riscv_seg8_store:
1200 // Operands are (vec, ..., vec, ptr, vl, int_id)
1201 Info.ptrVal = I.getArgOperand(I.getNumOperands() - 3);
1202 Info.memVT =
1203 getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
1204 Info.align = Align(
1205 DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
1206 8);
1209 return true;
1210 }
1211}
1212
1214 const AddrMode &AM, Type *Ty,
1215 unsigned AS,
1216 Instruction *I) const {
1217 // No global is ever allowed as a base.
1218 if (AM.BaseGV)
1219 return false;
1220
1221 // RVV instructions only support register addressing.
1222 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1223 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1224
1225 // Require a 12-bit signed offset.
1226 if (!isInt<12>(AM.BaseOffs))
1227 return false;
1228
1229 switch (AM.Scale) {
1230 case 0: // "r+i" or just "i", depending on HasBaseReg.
1231 break;
1232 case 1:
1233 if (!AM.HasBaseReg) // allow "r+i".
1234 break;
1235 return false; // disallow "r+r" or "r+r+i".
1236 default:
1237 return false;
1238 }
1239
1240 return true;
1241}
1242
1244 return isInt<12>(Imm);
1245}
1246
1248 return isInt<12>(Imm);
1249}
1250
1251// On RV32, 64-bit integers are split into their high and low parts and held
1252// in two different registers, so the trunc is free since the low register can
1253// just be used.
1254// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1255// isTruncateFree?
1257 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1258 return false;
1259 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1260 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1261 return (SrcBits == 64 && DestBits == 32);
1262}
1263
1265 // We consider i64->i32 free on RV64 since we have good selection of W
1266 // instructions that make promoting operations back to i64 free in many cases.
1267 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1268 !DstVT.isInteger())
1269 return false;
1270 unsigned SrcBits = SrcVT.getSizeInBits();
1271 unsigned DestBits = DstVT.getSizeInBits();
1272 return (SrcBits == 64 && DestBits == 32);
1273}
1274
1276 // Zexts are free if they can be combined with a load.
1277 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1278 // poorly with type legalization of compares preferring sext.
1279 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1280 EVT MemVT = LD->getMemoryVT();
1281 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1282 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1283 LD->getExtensionType() == ISD::ZEXTLOAD))
1284 return true;
1285 }
1286
1287 return TargetLowering::isZExtFree(Val, VT2);
1288}
1289
1291 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1292}
1293
1295 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1296}
1297
1299 return Subtarget.hasStdExtZbb();
1300}
1301
1303 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1304}
1305
1307 const Instruction &AndI) const {
1308 // We expect to be able to match a bit extraction instruction if the Zbs
1309 // extension is supported and the mask is a power of two. However, we
1310 // conservatively return false if the mask would fit in an ANDI instruction,
1311 // on the basis that it's possible the sinking+duplication of the AND in
1312 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1313 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1314 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1315 return false;
1316 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1317 if (!Mask)
1318 return false;
1319 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1320}
1321
1323 EVT VT = Y.getValueType();
1324
1325 // FIXME: Support vectors once we have tests.
1326 if (VT.isVector())
1327 return false;
1328
1329 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1330 !isa<ConstantSDNode>(Y);
1331}
1332
1334 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1335 if (Subtarget.hasStdExtZbs())
1336 return X.getValueType().isScalarInteger();
1337 auto *C = dyn_cast<ConstantSDNode>(Y);
1338 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1339 if (Subtarget.hasVendorXTHeadBs())
1340 return C != nullptr;
1341 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1342 return C && C->getAPIntValue().ule(10);
1343}
1344
1346 EVT VT) const {
1347 // Only enable for rvv.
1348 if (!VT.isVector() || !Subtarget.hasVInstructions())
1349 return false;
1350
1351 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1352 return false;
1353
1354 return true;
1355}
1356
1358 Type *Ty) const {
1359 assert(Ty->isIntegerTy());
1360
1361 unsigned BitSize = Ty->getIntegerBitWidth();
1362 if (BitSize > Subtarget.getXLen())
1363 return false;
1364
1365 // Fast path, assume 32-bit immediates are cheap.
1366 int64_t Val = Imm.getSExtValue();
1367 if (isInt<32>(Val))
1368 return true;
1369
1370 // A constant pool entry may be more aligned thant he load we're trying to
1371 // replace. If we don't support unaligned scalar mem, prefer the constant
1372 // pool.
1373 // TODO: Can the caller pass down the alignment?
1374 if (!Subtarget.enableUnalignedScalarMem())
1375 return true;
1376
1377 // Prefer to keep the load if it would require many instructions.
1378 // This uses the same threshold we use for constant pools but doesn't
1379 // check useConstantPoolForLargeInts.
1380 // TODO: Should we keep the load only when we're definitely going to emit a
1381 // constant pool?
1382
1384 RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
1385 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1386}
1387
1391 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1392 SelectionDAG &DAG) const {
1393 // One interesting pattern that we'd want to form is 'bit extract':
1394 // ((1 >> Y) & 1) ==/!= 0
1395 // But we also need to be careful not to try to reverse that fold.
1396
1397 // Is this '((1 >> Y) & 1)'?
1398 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1399 return false; // Keep the 'bit extract' pattern.
1400
1401 // Will this be '((1 >> Y) & 1)' after the transform?
1402 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1403 return true; // Do form the 'bit extract' pattern.
1404
1405 // If 'X' is a constant, and we transform, then we will immediately
1406 // try to undo the fold, thus causing endless combine loop.
1407 // So only do the transform if X is not a constant. This matches the default
1408 // implementation of this function.
1409 return !XC;
1410}
1411
1412bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1413 switch (Opcode) {
1414 case Instruction::Add:
1415 case Instruction::Sub:
1416 case Instruction::Mul:
1417 case Instruction::And:
1418 case Instruction::Or:
1419 case Instruction::Xor:
1420 case Instruction::FAdd:
1421 case Instruction::FSub:
1422 case Instruction::FMul:
1423 case Instruction::FDiv:
1424 case Instruction::ICmp:
1425 case Instruction::FCmp:
1426 return true;
1427 case Instruction::Shl:
1428 case Instruction::LShr:
1429 case Instruction::AShr:
1430 case Instruction::UDiv:
1431 case Instruction::SDiv:
1432 case Instruction::URem:
1433 case Instruction::SRem:
1434 return Operand == 1;
1435 default:
1436 return false;
1437 }
1438}
1439
1440
1442 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1443 return false;
1444
1445 if (canSplatOperand(I->getOpcode(), Operand))
1446 return true;
1447
1448 auto *II = dyn_cast<IntrinsicInst>(I);
1449 if (!II)
1450 return false;
1451
1452 switch (II->getIntrinsicID()) {
1453 case Intrinsic::fma:
1454 case Intrinsic::vp_fma:
1455 return Operand == 0 || Operand == 1;
1456 case Intrinsic::vp_shl:
1457 case Intrinsic::vp_lshr:
1458 case Intrinsic::vp_ashr:
1459 case Intrinsic::vp_udiv:
1460 case Intrinsic::vp_sdiv:
1461 case Intrinsic::vp_urem:
1462 case Intrinsic::vp_srem:
1463 return Operand == 1;
1464 // These intrinsics are commutative.
1465 case Intrinsic::vp_add:
1466 case Intrinsic::vp_mul:
1467 case Intrinsic::vp_and:
1468 case Intrinsic::vp_or:
1469 case Intrinsic::vp_xor:
1470 case Intrinsic::vp_fadd:
1471 case Intrinsic::vp_fmul:
1472 // These intrinsics have 'vr' versions.
1473 case Intrinsic::vp_sub:
1474 case Intrinsic::vp_fsub:
1475 case Intrinsic::vp_fdiv:
1476 return Operand == 0 || Operand == 1;
1477 default:
1478 return false;
1479 }
1480}
1481
1482/// Check if sinking \p I's operands to I's basic block is profitable, because
1483/// the operands can be folded into a target instruction, e.g.
1484/// splats of scalars can fold into vector instructions.
1486 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1487 using namespace llvm::PatternMatch;
1488
1489 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1490 return false;
1491
1492 for (auto OpIdx : enumerate(I->operands())) {
1493 if (!canSplatOperand(I, OpIdx.index()))
1494 continue;
1495
1496 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1497 // Make sure we are not already sinking this operand
1498 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1499 continue;
1500
1501 // We are looking for a splat that can be sunk.
1503 m_Undef(), m_ZeroMask())))
1504 continue;
1505
1506 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1507 // and vector registers
1508 for (Use &U : Op->uses()) {
1509 Instruction *Insn = cast<Instruction>(U.getUser());
1510 if (!canSplatOperand(Insn, U.getOperandNo()))
1511 return false;
1512 }
1513
1514 Ops.push_back(&Op->getOperandUse(0));
1515 Ops.push_back(&OpIdx.value());
1516 }
1517 return true;
1518}
1519
1521 unsigned Opc = VecOp.getOpcode();
1522
1523 // Assume target opcodes can't be scalarized.
1524 // TODO - do we have any exceptions?
1525 if (Opc >= ISD::BUILTIN_OP_END)
1526 return false;
1527
1528 // If the vector op is not supported, try to convert to scalar.
1529 EVT VecVT = VecOp.getValueType();
1530 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
1531 return true;
1532
1533 // If the vector op is supported, but the scalar op is not, the transform may
1534 // not be worthwhile.
1535 EVT ScalarVT = VecVT.getScalarType();
1536 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
1537}
1538
1540 const GlobalAddressSDNode *GA) const {
1541 // In order to maximise the opportunity for common subexpression elimination,
1542 // keep a separate ADD node for the global address offset instead of folding
1543 // it in the global address node. Later peephole optimisations may choose to
1544 // fold it back in when profitable.
1545 return false;
1546}
1547
1548// Returns 0-31 if the fli instruction is available for the type and this is
1549// legal FP immediate for the type. Returns -1 otherwise.
1551 if (!Subtarget.hasStdExtZfa())
1552 return -1;
1553
1554 bool IsSupportedVT = false;
1555 if (VT == MVT::f16) {
1556 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
1557 } else if (VT == MVT::f32) {
1558 IsSupportedVT = true;
1559 } else if (VT == MVT::f64) {
1560 assert(Subtarget.hasStdExtD() && "Expect D extension");
1561 IsSupportedVT = true;
1562 }
1563
1564 if (!IsSupportedVT)
1565 return -1;
1566
1567 return RISCVLoadFPImm::getLoadFPImm(Imm);
1568}
1569
1571 bool ForCodeSize) const {
1572 bool IsLegalVT = false;
1573 if (VT == MVT::f16)
1574 IsLegalVT = Subtarget.hasStdExtZfhOrZfhmin();
1575 else if (VT == MVT::f32)
1576 IsLegalVT = Subtarget.hasStdExtF();
1577 else if (VT == MVT::f64)
1578 IsLegalVT = Subtarget.hasStdExtD();
1579
1580 if (!IsLegalVT)
1581 return false;
1582
1583 if (getLegalZfaFPImm(Imm, VT) >= 0)
1584 return true;
1585
1586 // Cannot create a 64 bit floating-point immediate value for rv32.
1587 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
1588 // td can handle +0.0 or -0.0 already.
1589 // -0.0 can be created by fmv + fneg.
1590 return Imm.isZero();
1591 }
1592 // Special case: the cost for -0.0 is 1.
1593 int Cost = Imm.isNegZero()
1594 ? 1
1595 : RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
1596 Subtarget.getXLen(),
1597 Subtarget.getFeatureBits());
1598 // If the constantpool data is already in cache, only Cost 1 is cheaper.
1599 return Cost < FPImmCost;
1600}
1601
1602// TODO: This is very conservative.
1604 unsigned Index) const {
1606 return false;
1607
1608 // Only support extracting a fixed from a fixed vector for now.
1609 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
1610 return false;
1611
1612 unsigned ResElts = ResVT.getVectorNumElements();
1613 unsigned SrcElts = SrcVT.getVectorNumElements();
1614
1615 // Convervatively only handle extracting half of a vector.
1616 // TODO: Relax this.
1617 if ((ResElts * 2) != SrcElts)
1618 return false;
1619
1620 // The smallest type we can slide is i8.
1621 // TODO: We can extract index 0 from a mask vector without a slide.
1622 if (ResVT.getVectorElementType() == MVT::i1)
1623 return false;
1624
1625 // Slide can support arbitrary index, but we only treat vslidedown.vi as
1626 // cheap.
1627 if (Index >= 32)
1628 return false;
1629
1630 // TODO: We can do arbitrary slidedowns, but for now only support extracting
1631 // the upper half of a vector until we have more test coverage.
1632 return Index == 0 || Index == ResElts;
1633}
1634
1637 EVT VT) const {
1638 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
1639 // We might still end up using a GPR but that will be decided based on ABI.
1640 if (VT == MVT::f16 && Subtarget.hasStdExtF() &&
1641 !Subtarget.hasStdExtZfhOrZfhmin())
1642 return MVT::f32;
1643
1645}
1646
1649 EVT VT) const {
1650 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
1651 // We might still end up using a GPR but that will be decided based on ABI.
1652 if (VT == MVT::f16 && Subtarget.hasStdExtF() &&
1653 !Subtarget.hasStdExtZfhOrZfhmin())
1654 return 1;
1655
1657}
1658
1659// Changes the condition code and swaps operands if necessary, so the SetCC
1660// operation matches one of the comparisons supported directly by branches
1661// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1662// with 1/-1.
1663static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1664 ISD::CondCode &CC, SelectionDAG &DAG) {
1665 // If this is a single bit test that can't be handled by ANDI, shift the
1666 // bit to be tested to the MSB and perform a signed compare with 0.
1667 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1668 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1669 isa<ConstantSDNode>(LHS.getOperand(1))) {
1670 uint64_t Mask = LHS.getConstantOperandVal(1);
1671 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1672 unsigned ShAmt = 0;
1673 if (isPowerOf2_64(Mask)) {
1675 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1676 } else {
1677 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1678 }
1679
1680 LHS = LHS.getOperand(0);
1681 if (ShAmt != 0)
1682 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1683 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1684 return;
1685 }
1686 }
1687
1688 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1689 int64_t C = RHSC->getSExtValue();
1690 switch (CC) {
1691 default: break;
1692 case ISD::SETGT:
1693 // Convert X > -1 to X >= 0.
1694 if (C == -1) {
1695 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1696 CC = ISD::SETGE;
1697 return;
1698 }
1699 break;
1700 case ISD::SETLT:
1701 // Convert X < 1 to 0 <= X.
1702 if (C == 1) {
1703 RHS = LHS;
1704 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1705 CC = ISD::SETGE;
1706 return;
1707 }
1708 break;
1709 }
1710 }
1711
1712 switch (CC) {
1713 default:
1714 break;
1715 case ISD::SETGT:
1716 case ISD::SETLE:
1717 case ISD::SETUGT:
1718 case ISD::SETULE:
1720 std::swap(LHS, RHS);
1721 break;
1722 }
1723}
1724
1726 assert(VT.isScalableVector() && "Expecting a scalable vector type");
1727 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1728 if (VT.getVectorElementType() == MVT::i1)
1729 KnownSize *= 8;
1730
1731 switch (KnownSize) {
1732 default:
1733 llvm_unreachable("Invalid LMUL.");
1734 case 8:
1736 case 16:
1738 case 32:
1740 case 64:
1742 case 128:
1744 case 256:
1746 case 512:
1748 }
1749}
1750
1752 switch (LMul) {
1753 default:
1754 llvm_unreachable("Invalid LMUL.");
1759 return RISCV::VRRegClassID;
1761 return RISCV::VRM2RegClassID;
1763 return RISCV::VRM4RegClassID;
1765 return RISCV::VRM8RegClassID;
1766 }
1767}
1768
1770 RISCVII::VLMUL LMUL = getLMUL(VT);
1771 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1772 LMUL == RISCVII::VLMUL::LMUL_F4 ||
1773 LMUL == RISCVII::VLMUL::LMUL_F2 ||
1774 LMUL == RISCVII::VLMUL::LMUL_1) {
1775 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1776 "Unexpected subreg numbering");
1777 return RISCV::sub_vrm1_0 + Index;
1778 }
1779 if (LMUL == RISCVII::VLMUL::LMUL_2) {
1780 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1781 "Unexpected subreg numbering");
1782 return RISCV::sub_vrm2_0 + Index;
1783 }
1784 if (LMUL == RISCVII::VLMUL::LMUL_4) {
1785 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1786 "Unexpected subreg numbering");
1787 return RISCV::sub_vrm4_0 + Index;
1788 }
1789 llvm_unreachable("Invalid vector type.");
1790}
1791
1793 if (VT.getVectorElementType() == MVT::i1)
1794 return RISCV::VRRegClassID;
1795 return getRegClassIDForLMUL(getLMUL(VT));
1796}
1797
1798// Attempt to decompose a subvector insert/extract between VecVT and
1799// SubVecVT via subregister indices. Returns the subregister index that
1800// can perform the subvector insert/extract with the given element index, as
1801// well as the index corresponding to any leftover subvectors that must be
1802// further inserted/extracted within the register class for SubVecVT.
1803std::pair<unsigned, unsigned>
1805 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1806 const RISCVRegisterInfo *TRI) {
1807 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1808 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1809 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1810 "Register classes not ordered");
1811 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1812 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1813 // Try to compose a subregister index that takes us from the incoming
1814 // LMUL>1 register class down to the outgoing one. At each step we half
1815 // the LMUL:
1816 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1817 // Note that this is not guaranteed to find a subregister index, such as
1818 // when we are extracting from one VR type to another.
1819 unsigned SubRegIdx = RISCV::NoSubRegister;
1820 for (const unsigned RCID :
1821 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1822 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1823 VecVT = VecVT.getHalfNumVectorElementsVT();
1824 bool IsHi =
1825 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1826 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1827 getSubregIndexByMVT(VecVT, IsHi));
1828 if (IsHi)
1829 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1830 }
1831 return {SubRegIdx, InsertExtractIdx};
1832}
1833
1834// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1835// stores for those types.
1836bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1837 return !Subtarget.useRVVForFixedLengthVectors() ||
1839}
1840
1842 if (ScalarTy->isPointerTy())
1843 return true;
1844
1845 if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1846 ScalarTy->isIntegerTy(32))
1847 return true;
1848
1849 if (ScalarTy->isIntegerTy(64))
1850 return Subtarget.hasVInstructionsI64();
1851
1852 if (ScalarTy->isHalfTy())
1853 return Subtarget.hasVInstructionsF16();
1854 if (ScalarTy->isFloatTy())
1855 return Subtarget.hasVInstructionsF32();
1856 if (ScalarTy->isDoubleTy())
1857 return Subtarget.hasVInstructionsF64();
1858
1859 return false;
1860}
1861
1862unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
1863 return NumRepeatedDivisors;
1864}
1865
1867 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1868 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
1869 "Unexpected opcode");
1870 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
1871 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
1873 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
1874 if (!II)
1875 return SDValue();
1876 return Op.getOperand(II->VLOperand + 1 + HasChain);
1877}
1878
1880 const RISCVSubtarget &Subtarget) {
1881 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1882 if (!Subtarget.useRVVForFixedLengthVectors())
1883 return false;
1884
1885 // We only support a set of vector types with a consistent maximum fixed size
1886 // across all supported vector element types to avoid legalization issues.
1887 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1888 // fixed-length vector type we support is 1024 bytes.
1889 if (VT.getFixedSizeInBits() > 1024 * 8)
1890 return false;
1891
1892 unsigned MinVLen = Subtarget.getRealMinVLen();
1893
1894 MVT EltVT = VT.getVectorElementType();
1895
1896 // Don't use RVV for vectors we cannot scalarize if required.
1897 switch (EltVT.SimpleTy) {
1898 // i1 is supported but has different rules.
1899 default:
1900 return false;
1901 case MVT::i1:
1902 // Masks can only use a single register.
1903 if (VT.getVectorNumElements() > MinVLen)
1904 return false;
1905 MinVLen /= 8;
1906 break;
1907 case MVT::i8:
1908 case MVT::i16:
1909 case MVT::i32:
1910 break;
1911 case MVT::i64:
1912 if (!Subtarget.hasVInstructionsI64())
1913 return false;
1914 break;
1915 case MVT::f16:
1916 if (!Subtarget.hasVInstructionsF16())
1917 return false;
1918 break;
1919 case MVT::f32:
1920 if (!Subtarget.hasVInstructionsF32())
1921 return false;
1922 break;
1923 case MVT::f64:
1924 if (!Subtarget.hasVInstructionsF64())
1925 return false;
1926 break;
1927 }
1928
1929 // Reject elements larger than ELEN.
1930 if (EltVT.getSizeInBits() > Subtarget.getELEN())
1931 return false;
1932
1933 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1934 // Don't use RVV for types that don't fit.
1935 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1936 return false;
1937
1938 // TODO: Perhaps an artificial restriction, but worth having whilst getting
1939 // the base fixed length RVV support in place.
1940 if (!VT.isPow2VectorType())
1941 return false;
1942
1943 return true;
1944}
1945
1946bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1947 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1948}
1949
1950// Return the largest legal scalable vector type that matches VT's element type.
1952 const RISCVSubtarget &Subtarget) {
1953 // This may be called before legal types are setup.
1954 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1955 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1956 "Expected legal fixed length vector!");
1957
1958 unsigned MinVLen = Subtarget.getRealMinVLen();
1959 unsigned MaxELen = Subtarget.getELEN();
1960
1961 MVT EltVT = VT.getVectorElementType();
1962 switch (EltVT.SimpleTy) {
1963 default:
1964 llvm_unreachable("unexpected element type for RVV container");
1965 case MVT::i1:
1966 case MVT::i8:
1967 case MVT::i16:
1968 case MVT::i32:
1969 case MVT::i64:
1970 case MVT::f16:
1971 case MVT::f32:
1972 case MVT::f64: {
1973 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1974 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1975 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1976 unsigned NumElts =
1978 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1979 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1980 return MVT::getScalableVectorVT(EltVT, NumElts);
1981 }
1982 }
1983}
1984
1986 const RISCVSubtarget &Subtarget) {
1988 Subtarget);
1989}
1990
1992 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
1993}
1994
1995// Grow V to consume an entire RVV register.
1997 const RISCVSubtarget &Subtarget) {
1998 assert(VT.isScalableVector() &&
1999 "Expected to convert into a scalable vector!");
2000 assert(V.getValueType().isFixedLengthVector() &&
2001 "Expected a fixed length vector operand!");
2002 SDLoc DL(V);
2003 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2004 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2005}
2006
2007// Shrink V so it's just big enough to maintain a VT's worth of data.
2009 const RISCVSubtarget &Subtarget) {
2011 "Expected to convert into a fixed length vector!");
2012 assert(V.getValueType().isScalableVector() &&
2013 "Expected a scalable vector operand!");
2014 SDLoc DL(V);
2015 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2016 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2017}
2018
2019/// Return the type of the mask type suitable for masking the provided
2020/// vector type. This is simply an i1 element type vector of the same
2021/// (possibly scalable) length.
2022static MVT getMaskTypeFor(MVT VecVT) {
2023 assert(VecVT.isVector());
2025 return MVT::getVectorVT(MVT::i1, EC);
2026}
2027
2028/// Creates an all ones mask suitable for masking a vector of type VecTy with
2029/// vector length VL. .
2031 SelectionDAG &DAG) {
2032 MVT MaskVT = getMaskTypeFor(VecVT);
2033 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2034}
2035
2037 const RISCVSubtarget &Subtarget) {
2038 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2039}
2040
2041static std::pair<SDValue, SDValue>
2042getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
2043 const RISCVSubtarget &Subtarget) {
2044 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2045 SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
2046 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2047 return {Mask, VL};
2048}
2049
2050// Gets the two common "VL" operands: an all-ones mask and the vector length.
2051// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2052// the vector type that the fixed-length vector is contained in. Otherwise if
2053// VecVT is scalable, then ContainerVT should be the same as VecVT.
2054static std::pair<SDValue, SDValue>
2055getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
2056 const RISCVSubtarget &Subtarget) {
2057 if (VecVT.isFixedLengthVector())
2058 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2059 Subtarget);
2060 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2061 MVT XLenVT = Subtarget.getXLenVT();
2062 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
2063 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2064 return {Mask, VL};
2065}
2066
2067// As above but assuming the given type is a scalable vector type.
2068static std::pair<SDValue, SDValue>
2070 const RISCVSubtarget &Subtarget) {
2071 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2072 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
2073}
2074
2076 SelectionDAG &DAG) const {
2077 assert(VecVT.isScalableVector() && "Expected scalable vector");
2078 unsigned MinElts = VecVT.getVectorMinNumElements();
2079 return DAG.getNode(ISD::VSCALE, DL, Subtarget.getXLenVT(),
2080 getVLOp(MinElts, DL, DAG, Subtarget));
2081}
2082
2083// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2084// of either is (currently) supported. This can get us into an infinite loop
2085// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2086// as a ..., etc.
2087// Until either (or both) of these can reliably lower any node, reporting that
2088// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2089// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2090// which is not desirable.
2092 EVT VT, unsigned DefinedValues) const {
2093 return false;
2094}
2095
2097 const RISCVSubtarget &Subtarget) {
2098 // RISCV FP-to-int conversions saturate to the destination register size, but
2099 // don't produce 0 for nan. We can use a conversion instruction and fix the
2100 // nan case with a compare and a select.
2101 SDValue Src = Op.getOperand(0);
2102
2103 MVT DstVT = Op.getSimpleValueType();
2104 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2105
2106 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2107
2108 if (!DstVT.isVector()) {
2109 // In absense of Zfh, promote f16 to f32, then saturate the result.
2110 if (Src.getSimpleValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) {
2111 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2112 }
2113
2114 unsigned Opc;
2115 if (SatVT == DstVT)
2116 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2117 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2119 else
2120 return SDValue();
2121 // FIXME: Support other SatVTs by clamping before or after the conversion.
2122
2123 SDLoc DL(Op);
2124 SDValue FpToInt = DAG.getNode(
2125 Opc, DL, DstVT, Src,
2127
2128 if (Opc == RISCVISD::FCVT_WU_RV64)
2129 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2130
2131 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2132 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2134 }
2135
2136 // Vectors.
2137
2138 MVT DstEltVT = DstVT.getVectorElementType();
2139 MVT SrcVT = Src.getSimpleValueType();
2140 MVT SrcEltVT = SrcVT.getVectorElementType();
2141 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2142 unsigned DstEltSize = DstEltVT.getSizeInBits();
2143
2144 // Only handle saturating to the destination type.
2145 if (SatVT != DstEltVT)
2146 return SDValue();
2147
2148 // FIXME: Don't support narrowing by more than 1 steps for now.
2149 if (SrcEltSize > (2 * DstEltSize))
2150 return SDValue();
2151
2152 MVT DstContainerVT = DstVT;
2153 MVT SrcContainerVT = SrcVT;
2154 if (DstVT.isFixedLengthVector()) {
2155 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2156 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2157 assert(DstContainerVT.getVectorElementCount() ==
2158 SrcContainerVT.getVectorElementCount() &&
2159 "Expected same element count");
2160 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2161 }
2162
2163 SDLoc DL(Op);
2164
2165 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2166
2167 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2168 {Src, Src, DAG.getCondCode(ISD::SETNE),
2169 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2170
2171 // Need to widen by more than 1 step, promote the FP type, then do a widening
2172 // convert.
2173 if (DstEltSize > (2 * SrcEltSize)) {
2174 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2175 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2176 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2177 }
2178
2179 unsigned RVVOpc =
2181 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2182
2183 SDValue SplatZero = DAG.getNode(
2184 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2185 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2186 Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
2187 Res, VL);
2188
2189 if (DstVT.isFixedLengthVector())
2190 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2191
2192 return Res;
2193}
2194
2196 switch (Opc) {
2197 case ISD::FROUNDEVEN:
2198 case ISD::VP_FROUNDEVEN:
2199 return RISCVFPRndMode::RNE;
2200 case ISD::FTRUNC:
2201 case ISD::VP_FROUNDTOZERO:
2202 return RISCVFPRndMode::RTZ;
2203 case ISD::FFLOOR:
2204 case ISD::VP_FFLOOR:
2205 return RISCVFPRndMode::RDN;
2206 case ISD::FCEIL:
2207 case ISD::VP_FCEIL:
2208 return RISCVFPRndMode::RUP;
2209 case ISD::FROUND:
2210 case ISD::VP_FROUND:
2211 return RISCVFPRndMode::RMM;
2212 case ISD::FRINT:
2213 return RISCVFPRndMode::DYN;
2214 }
2215
2217}
2218
2219// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2220// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2221// the integer domain and back. Taking care to avoid converting values that are
2222// nan or already correct.
2223static SDValue
2225 const RISCVSubtarget &Subtarget) {
2226 MVT VT = Op.getSimpleValueType();
2227 assert(VT.isVector() && "Unexpected type");
2228
2229 SDLoc DL(Op);
2230
2231 SDValue Src = Op.getOperand(0);
2232
2233 MVT ContainerVT = VT;
2234 if (VT.isFixedLengthVector()) {
2235 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2236 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2237 }
2238
2239 SDValue Mask, VL;
2240 if (Op->isVPOpcode()) {
2241 Mask = Op.getOperand(1);
2242 VL = Op.getOperand(2);
2243 } else {
2244 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2245 }
2246
2247 // Freeze the source since we are increasing the number of uses.
2248 Src = DAG.getFreeze(Src);
2249
2250 // We do the conversion on the absolute value and fix the sign at the end.
2251 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2252
2253 // Determine the largest integer that can be represented exactly. This and
2254 // values larger than it don't have any fractional bits so don't need to
2255 // be converted.
2256 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2257 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2258 APFloat MaxVal = APFloat(FltSem);
2259 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2260 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2261 SDValue MaxValNode =
2262 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2263 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2264 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2265
2266 // If abs(Src) was larger than MaxVal or nan, keep it.
2267 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2268 Mask =
2269 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2270 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2271 Mask, Mask, VL});
2272
2273 // Truncate to integer and convert back to FP.
2274 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2275 MVT XLenVT = Subtarget.getXLenVT();
2276 SDValue Truncated;
2277
2278 switch (Op.getOpcode()) {
2279 default:
2280 llvm_unreachable("Unexpected opcode");
2281 case ISD::FCEIL:
2282 case ISD::VP_FCEIL:
2283 case ISD::FFLOOR:
2284 case ISD::VP_FFLOOR:
2285 case ISD::FROUND:
2286 case ISD::FROUNDEVEN:
2287 case ISD::VP_FROUND:
2288 case ISD::VP_FROUNDEVEN:
2289 case ISD::VP_FROUNDTOZERO: {
2290 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2292 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2293 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2294 break;
2295 }
2296 case ISD::FTRUNC:
2297 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2298 Mask, VL);
2299 break;
2300 case ISD::VP_FRINT:
2301 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2302 break;
2303 case ISD::VP_FNEARBYINT:
2304 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2305 Mask, VL);
2306 break;
2307 }
2308
2309 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2310 if (Op.getOpcode() != ISD::VP_FNEARBYINT)
2311 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2312 Mask, VL);
2313
2314 // Restore the original sign so that -0.0 is preserved.
2315 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2316 Src, Src, Mask, VL);
2317
2318 if (!VT.isFixedLengthVector())
2319 return Truncated;
2320
2321 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2322}
2323
2324static SDValue
2326 const RISCVSubtarget &Subtarget) {
2327 MVT VT = Op.getSimpleValueType();
2328 if (VT.isVector())
2329 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
2330
2331 if (DAG.shouldOptForSize())
2332 return SDValue();
2333
2334 SDLoc DL(Op);
2335 SDValue Src = Op.getOperand(0);
2336
2337 // Create an integer the size of the mantissa with the MSB set. This and all
2338 // values larger than it don't have any fractional bits so don't need to be
2339 // converted.
2340 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
2341 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2342 APFloat MaxVal = APFloat(FltSem);
2343 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2344 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2345 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
2346
2347 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2348 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
2349 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
2350}
2351
2355 int64_t Addend;
2356};
2357
2358static std::optional<uint64_t> getExactInteger(const APFloat &APF,
2360 APSInt ValInt(BitWidth, !APF.isNegative());
2361 // We use an arbitrary rounding mode here. If a floating-point is an exact
2362 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
2363 // the rounding mode changes the output value, then it is not an exact
2364 // integer.
2366 bool IsExact;
2367 // If it is out of signed integer range, it will return an invalid operation.
2368 // If it is not an exact integer, IsExact is false.
2369 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
2371 !IsExact)
2372 return std::nullopt;
2373 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
2374}
2375
2376// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
2377// to the (non-zero) step S and start value X. This can be then lowered as the
2378// RVV sequence (VID * S) + X, for example.
2379// The step S is represented as an integer numerator divided by a positive
2380// denominator. Note that the implementation currently only identifies
2381// sequences in which either the numerator is +/- 1 or the denominator is 1. It
2382// cannot detect 2/3, for example.
2383// Note that this method will also match potentially unappealing index
2384// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
2385// determine whether this is worth generating code for.
2386static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
2387 unsigned NumElts = Op.getNumOperands();
2388 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
2389 bool IsInteger = Op.getValueType().isInteger();
2390
2391 std::optional<unsigned> SeqStepDenom;
2392 std::optional<int64_t> SeqStepNum, SeqAddend;
2393 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
2394 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
2395 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2396 // Assume undef elements match the sequence; we just have to be careful
2397 // when interpolating across them.
2398 if (Op.getOperand(Idx).isUndef())
2399 continue;
2400
2401 uint64_t Val;
2402 if (IsInteger) {
2403 // The BUILD_VECTOR must be all constants.
2404 if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
2405 return std::nullopt;
2406 Val = Op.getConstantOperandVal(Idx) &
2407 maskTrailingOnes<uint64_t>(EltSizeInBits);
2408 } else {
2409 // The BUILD_VECTOR must be all constants.
2410 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
2411 return std::nullopt;
2412 if (auto ExactInteger = getExactInteger(
2413 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
2414 EltSizeInBits))
2415 Val = *ExactInteger;
2416 else
2417 return std::nullopt;
2418 }
2419
2420 if (PrevElt) {
2421 // Calculate the step since the last non-undef element, and ensure
2422 // it's consistent across the entire sequence.
2423 unsigned IdxDiff = Idx - PrevElt->second;
2424 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
2425
2426 // A zero-value value difference means that we're somewhere in the middle
2427 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
2428 // step change before evaluating the sequence.
2429 if (ValDiff == 0)
2430 continue;
2431
2432 int64_t Remainder = ValDiff % IdxDiff;
2433 // Normalize the step if it's greater than 1.
2434 if (Remainder != ValDiff) {
2435 // The difference must cleanly divide the element span.
2436 if (Remainder != 0)
2437 return std::nullopt;
2438 ValDiff /= IdxDiff;
2439 IdxDiff = 1;
2440 }
2441
2442 if (!SeqStepNum)
2443 SeqStepNum = ValDiff;
2444 else if (ValDiff != SeqStepNum)
2445 return std::nullopt;
2446
2447 if (!SeqStepDenom)
2448 SeqStepDenom = IdxDiff;
2449 else if (IdxDiff != *SeqStepDenom)
2450 return std::nullopt;
2451 }
2452
2453 // Record this non-undef element for later.
2454 if (!PrevElt || PrevElt->first != Val)
2455 PrevElt = std::make_pair(Val, Idx);
2456 }
2457
2458 // We need to have logged a step for this to count as a legal index sequence.
2459 if (!SeqStepNum || !SeqStepDenom)
2460 return std::nullopt;
2461
2462 // Loop back through the sequence and validate elements we might have skipped
2463 // while waiting for a valid step. While doing this, log any sequence addend.
2464 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2465 if (Op.getOperand(Idx).isUndef())
2466 continue;
2467 uint64_t Val;
2468 if (IsInteger) {
2469 Val = Op.getConstantOperandVal(Idx) &
2470 maskTrailingOnes<uint64_t>(EltSizeInBits);
2471 } else {
2472 Val = *getExactInteger(
2473 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
2474 EltSizeInBits);
2475 }
2476 uint64_t ExpectedVal =
2477 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
2478 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
2479 if (!SeqAddend)
2480 SeqAddend = Addend;
2481 else if (Addend != SeqAddend)
2482 return std::nullopt;
2483 }
2484
2485 assert(SeqAddend && "Must have an addend if we have a step");
2486
2487 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
2488}
2489
2490// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
2491// and lower it as a VRGATHER_VX_VL from the source vector.
2492static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
2493 SelectionDAG &DAG,
2494 const RISCVSubtarget &Subtarget) {
2495 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2496 return SDValue();
2497 SDValue Vec = SplatVal.getOperand(0);
2498 // Only perform this optimization on vectors of the same size for simplicity.
2499 // Don't perform this optimization for i1 vectors.
2500 // FIXME: Support i1 vectors, maybe by promoting to i8?
2501 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
2502 return SDValue();
2503 SDValue Idx = SplatVal.getOperand(1);
2504 // The index must be a legal type.
2505 if (Idx.getValueType() != Subtarget.getXLenVT())
2506 return SDValue();
2507
2508 MVT ContainerVT = VT;
2509 if (VT.isFixedLengthVector()) {
2510 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2511 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2512 }
2513
2514 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2515
2516 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
2517 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
2518
2519 if (!VT.isFixedLengthVector())
2520 return Gather;
2521
2522 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2523}
2524
2526 const RISCVSubtarget &Subtarget) {
2527 MVT VT = Op.getSimpleValueType();
2528 assert(VT.isFixedLengthVector() && "Unexpected vector!");
2529
2530 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2531
2532 SDLoc DL(Op);
2533 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2534
2535 MVT XLenVT = Subtarget.getXLenVT();
2536 unsigned NumElts = Op.getNumOperands();
2537
2538 if (VT.getVectorElementType() == MVT::i1) {
2539 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
2540 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
2541 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
2542 }
2543
2544 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
2545 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
2546 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
2547 }
2548
2549 // Lower constant mask BUILD_VECTORs via an integer vector type, in
2550 // scalar integer chunks whose bit-width depends on the number of mask
2551 // bits and XLEN.
2552 // First, determine the most appropriate scalar integer type to use. This
2553 // is at most XLenVT, but may be shrunk to a smaller vector element type
2554 // according to the size of the final vector - use i8 chunks rather than
2555 // XLenVT if we're producing a v8i1. This results in more consistent
2556 // codegen across RV32 and RV64.
2557 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
2558 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
2559 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
2560 // If we have to use more than one INSERT_VECTOR_ELT then this
2561 // optimization is likely to increase code size; avoid peforming it in
2562 // such a case. We can use a load from a constant pool in this case.
2563 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
2564 return SDValue();
2565 // Now we can create our integer vector type. Note that it may be larger
2566 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
2567 MVT IntegerViaVecVT =
2568 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
2569 divideCeil(NumElts, NumViaIntegerBits));
2570
2571 uint64_t Bits = 0;
2572 unsigned BitPos = 0, IntegerEltIdx = 0;
2573 SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
2574
2575 for (unsigned I = 0; I < NumElts; I++, BitPos++) {
2576 // Once we accumulate enough bits to fill our scalar type, insert into
2577 // our vector and clear our accumulated data.
2578 if (I != 0 && I % NumViaIntegerBits == 0) {
2579 if (NumViaIntegerBits <= 32)
2580 Bits = SignExtend64<32>(Bits);
2581 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2582 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
2583 Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2584 Bits = 0;
2585 BitPos = 0;
2586 IntegerEltIdx++;
2587 }
2588 SDValue V = Op.getOperand(I);
2589 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
2590 Bits |= ((uint64_t)BitValue << BitPos);
2591 }
2592
2593 // Insert the (remaining) scalar value into position in our integer
2594 // vector type.
2595 if (NumViaIntegerBits <= 32)
2596 Bits = SignExtend64<32>(Bits);
2597 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2598 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
2599 DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2600
2601 if (NumElts < NumViaIntegerBits) {
2602 // If we're producing a smaller vector than our minimum legal integer
2603 // type, bitcast to the equivalent (known-legal) mask type, and extract
2604 // our final mask.
2605 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
2606 Vec = DAG.getBitcast(MVT::v8i1, Vec);
2607 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
2608 DAG.getConstant(0, DL, XLenVT));
2609 } else {
2610 // Else we must have produced an integer type with the same size as the
2611 // mask type; bitcast for the final result.
2612 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
2613 Vec = DAG.getBitcast(VT, Vec);
2614 }
2615
2616 return Vec;
2617 }
2618
2619 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
2620 // vector type, we have a legal equivalently-sized i8 type, so we can use
2621 // that.
2622 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
2623 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
2624
2625 SDValue WideVec;
2626 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2627 // For a splat, perform a scalar truncate before creating the wider
2628 // vector.
2629 assert(Splat.getValueType() == XLenVT &&
2630 "Unexpected type for i1 splat value");
2631 Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
2632 DAG.getConstant(1, DL, XLenVT));
2633 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
2634 } else {
2635 SmallVector<SDValue, 8> Ops(Op->op_values());
2636 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
2637 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
2638 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
2639 }
2640
2641 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
2642 }
2643
2644 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2645 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
2646 return Gather;
2647 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
2649 Splat =
2650 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
2651 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2652 }
2653
2654 // Try and match index sequences, which we can lower to the vid instruction
2655 // with optional modifications. An all-undef vector is matched by
2656 // getSplatValue, above.
2657 if (auto SimpleVID = isSimpleVIDSequence(Op)) {
2658 int64_t StepNumerator = SimpleVID->StepNumerator;
2659 unsigned StepDenominator = SimpleVID->StepDenominator;
2660 int64_t Addend = SimpleVID->Addend;
2661
2662 assert(StepNumerator != 0 && "Invalid step");
2663 bool Negate = false;
2664 int64_t SplatStepVal = StepNumerator;
2665 unsigned StepOpcode = ISD::MUL;
2666 if (StepNumerator != 1) {
2667 if (isPowerOf2_64(std::abs(StepNumerator))) {
2668 Negate = StepNumerator < 0;
2669 StepOpcode = ISD::SHL;
2670 SplatStepVal = Log2_64(std::abs(StepNumerator));
2671 }
2672 }
2673
2674 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
2675 // threshold since it's the immediate value many RVV instructions accept.
2676 // There is no vmul.vi instruction so ensure multiply constant can fit in
2677 // a single addi instruction.
2678 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
2679 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
2680 isPowerOf2_32(StepDenominator) &&
2681 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
2682 MVT VIDVT =
2684 MVT VIDContainerVT =
2685 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
2686 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
2687 // Convert right out of the scalable type so we can use standard ISD
2688 // nodes for the rest of the computation. If we used scalable types with
2689 // these, we'd lose the fixed-length vector info and generate worse
2690 // vsetvli code.
2691 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
2692 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
2693 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
2694 SDValue SplatStep = DAG.getSplatBuildVector(
2695 VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
2696 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
2697 }
2698 if (StepDenominator != 1) {
2699 SDValue SplatStep = DAG.getSplatBuildVector(
2700 VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
2701 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
2702 }
2703 if (Addend != 0 || Negate) {
2704 SDValue SplatAddend = DAG.getSplatBuildVector(
2705 VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT));
2706 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
2707 VID);
2708 }
2709 if (VT.isFloatingPoint()) {
2710 // TODO: Use vfwcvt to reduce register pressure.
2711 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
2712 }
2713 return VID;
2714 }
2715 }
2716
2717 // Attempt to detect "hidden" splats, which only reveal themselves as splats
2718 // when re-interpreted as a vector with a larger element type. For example,
2719 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
2720 // could be instead splat as
2721 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
2722 // TODO: This optimization could also work on non-constant splats, but it
2723 // would require bit-manipulation instructions to construct the splat value.
2724 SmallVector<SDValue> Sequence;
2725 unsigned EltBitSize = VT.getScalarSizeInBits();
2726 const auto *BV = cast<BuildVectorSDNode>(Op);
2727 if (VT.isInteger() && EltBitSize < 64 &&
2729 BV->getRepeatedSequence(Sequence) &&
2730 (Sequence.size() * EltBitSize) <= 64) {
2731 unsigned SeqLen = Sequence.size();
2732 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
2733 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
2734 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
2735 ViaIntVT == MVT::i64) &&
2736 "Unexpected sequence type");
2737
2738 unsigned EltIdx = 0;
2739 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2740 uint64_t SplatValue = 0;
2741 // Construct the amalgamated value which can be splatted as this larger
2742 // vector type.
2743 for (const auto &SeqV : Sequence) {
2744 if (!SeqV.isUndef())
2745 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2746 << (EltIdx * EltBitSize));
2747 EltIdx++;
2748 }
2749
2750 // On RV64, sign-extend from 32 to 64 bits where possible in order to
2751 // achieve better constant materializion.
2752 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
2753 SplatValue = SignExtend64<32>(SplatValue);
2754
2755 // Since we can't introduce illegal i64 types at this stage, we can only
2756 // perform an i64 splat on RV32 if it is its own sign-extended value. That
2757 // way we can use RVV instructions to splat.
2758 assert((ViaIntVT.bitsLE(XLenVT) ||
2759 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
2760 "Unexpected bitcast sequence");
2761 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2762 SDValue ViaVL =
2763 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
2764 MVT ViaContainerVT =
2765 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
2766 SDValue Splat =
2767 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
2768 DAG.getUNDEF(ViaContainerVT),
2769 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
2770 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
2771 return DAG.getBitcast(VT, Splat);
2772 }
2773 }
2774
2775 // Try and optimize BUILD_VECTORs with "dominant values" - these are values
2776 // which constitute a large proportion of the elements. In such cases we can
2777 // splat a vector with the dominant element and make up the shortfall with
2778 // INSERT_VECTOR_ELTs.
2779 // Note that this includes vectors of 2 elements by association. The
2780 // upper-most element is the "dominant" one, allowing us to use a splat to
2781 // "insert" the upper element, and an insert of the lower element at position
2782 // 0, which improves codegen.
2783 SDValue DominantValue;
2784 unsigned MostCommonCount = 0;
2785 DenseMap<SDValue, unsigned> ValueCounts;
2786 unsigned NumUndefElts =
2787 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
2788
2789 // Track the number of scalar loads we know we'd be inserting, estimated as
2790 // any non-zero floating-point constant. Other kinds of element are either
2791 // already in registers or are materialized on demand. The threshold at which
2792 // a vector load is more desirable than several scalar materializion and
2793 // vector-insertion instructions is not known.
2794 unsigned NumScalarLoads = 0;
2795
2796 for (SDValue V : Op->op_values()) {
2797 if (V.isUndef())
2798 continue;
2799
2800 ValueCounts.insert(std::make_pair(V, 0));
2801 unsigned &Count = ValueCounts[V];
2802
2803 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2804 NumScalarLoads += !CFP->isExactlyValue(+0.0);
2805
2806 // Is this value dominant? In case of a tie, prefer the highest element as
2807 // it's cheaper to insert near the beginning of a vector than it is at the
2808 // end.
2809 if (++Count >= MostCommonCount) {
2810 DominantValue = V;
2811 MostCommonCount = Count;
2812 }
2813 }
2814
2815 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
2816 unsigned NumDefElts = NumElts - NumUndefElts;
2817 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2818
2819 // Don't perform this optimization when optimizing for size, since
2820 // materializing elements and inserting them tends to cause code bloat.
2821 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
2822 ((MostCommonCount > DominantValueCountThreshold) ||
2823 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
2824 // Start by splatting the most common element.
2825 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
2826
2827 DenseSet<SDValue> Processed{DominantValue};
2828 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
2829 for (const auto &OpIdx : enumerate(Op->ops())) {
2830 const SDValue &V = OpIdx.value();
2831 if (V.isUndef() || !Processed.insert(V).second)
2832 continue;
2833 if (ValueCounts[V] == 1) {
2834 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
2835 DAG.getConstant(OpIdx.index(), DL, XLenVT));
2836 } else {
2837 // Blend in all instances of this value using a VSELECT, using a
2838 // mask where each bit signals whether that element is the one
2839 // we're after.
2841 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
2842 return DAG.getConstant(V == V1, DL, XLenVT);
2843 });
2844 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
2845 DAG.getBuildVector(SelMaskTy, DL, Ops),
2846 DAG.getSplatBuildVector(VT, DL, V), Vec);
2847 }
2848 }
2849
2850 return Vec;
2851 }
2852
2853 return SDValue();
2854}
2855
2856static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2858 SelectionDAG &DAG) {
2859 if (!Passthru)
2860 Passthru = DAG.getUNDEF(VT);
2861 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2862 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2863 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2864 // If Hi constant is all the same sign bit as Lo, lower this as a custom
2865 // node in order to try and match RVV vector/scalar instructions.
2866 if ((LoC >> 31) == HiC)
2867 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
2868
2869 // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
2870 // vmv.v.x whose EEW = 32 to lower it.
2871 auto *Const = dyn_cast<ConstantSDNode>(VL);
2872 if (LoC == HiC && Const && Const->isAllOnes()) {
2874 // TODO: if vl <= min(VLMAX), we can also do this. But we could not
2875 // access the subtarget here now.
2876 auto InterVec = DAG.getNode(
2877 RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
2878 DAG.getRegister(RISCV::X0, MVT::i32));
2879 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
2880 }
2881 }
2882
2883 // Fall back to a stack store and stride x0 vector load.
2884 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
2885 Hi, VL);
2886}
2887
2888// Called by type legalization to handle splat of i64 on RV32.
2889// FIXME: We can optimize this when the type has sign or zero bits in one
2890// of the halves.
2891static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2892 SDValue Scalar, SDValue VL,
2893 SelectionDAG &DAG) {
2894 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2896 DAG.getConstant(0, DL, MVT::i32));
2898 DAG.getConstant(1, DL, MVT::i32));
2899 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
2900}
2901
2902// This function lowers a splat of a scalar operand Splat with the vector
2903// length VL. It ensures the final sequence is type legal, which is useful when
2904// lowering a splat after type legalization.
2905static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
2906 MVT VT, SDLoc DL, SelectionDAG &DAG,
2907 const RISCVSubtarget &Subtarget) {
2908 bool HasPassthru = Passthru && !Passthru.isUndef();
2909 if (!HasPassthru && !Passthru)
2910 Passthru = DAG.getUNDEF(VT);
2911 if (VT.isFloatingPoint()) {
2912 // If VL is 1, we could use vfmv.s.f.
2913 if (isOneConstant(VL))
2914 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
2915 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
2916 }
2917
2918 MVT XLenVT = Subtarget.getXLenVT();
2919
2920 // Simplest case is that the operand needs to be promoted to XLenVT.
2921 if (Scalar.getValueType().bitsLE(XLenVT)) {
2922 // If the operand is a constant, sign extend to increase our chances
2923 // of being able to use a .vi instruction. ANY_EXTEND would become a
2924 // a zero extend and the simm5 check in isel would fail.
2925 // FIXME: Should we ignore the upper bits in isel instead?
2926 unsigned ExtOpc =
2927 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2928 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2929 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
2930 // If VL is 1 and the scalar value won't benefit from immediate, we could
2931 // use vmv.s.x.
2932 if (isOneConstant(VL) &&
2933 (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2934 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
2935 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
2936 }
2937
2938 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2939 "Unexpected scalar for splat lowering!");
2940
2941 if (isOneConstant(VL) && isNullConstant(Scalar))
2942 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
2943 DAG.getConstant(0, DL, XLenVT), VL);
2944
2945 // Otherwise use the more complicated splatting algorithm.
2946 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
2947}
2948
2949static MVT getLMUL1VT(MVT VT) {
2951 "Unexpected vector MVT");
2955}
2956
2957// This function lowers an insert of a scalar operand Scalar into lane
2958// 0 of the vector regardless of the value of VL. The contents of the
2959// remaining lanes of the result vector are unspecified. VL is assumed
2960// to be non-zero.
2962 MVT VT, SDLoc DL, SelectionDAG &DAG,
2963 const RISCVSubtarget &Subtarget) {
2964 const MVT XLenVT = Subtarget.getXLenVT();
2965
2966 SDValue Passthru = DAG.getUNDEF(VT);
2967 if (VT.isFloatingPoint()) {
2968 // TODO: Use vmv.v.i for appropriate constants
2969 // Use M1 or smaller to avoid over constraining register allocation
2970 const MVT M1VT = getLMUL1VT(VT);
2971 auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
2972 SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT,
2973 DAG.getUNDEF(InnerVT), Scalar, VL);
2974 if (VT != InnerVT)
2975 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2976 DAG.getUNDEF(VT),
2977 Result, DAG.getConstant(0, DL, XLenVT));
2978 return Result;
2979 }
2980
2981
2982 // Avoid the tricky legalization cases by falling back to using the
2983 // splat code which already handles it gracefully.
2984 if (!Scalar.getValueType().bitsLE(XLenVT))
2985 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
2986 DAG.getConstant(1, DL, XLenVT),
2987 VT, DL, DAG, Subtarget);
2988
2989 // If the operand is a constant, sign extend to increase our chances
2990 // of being able to use a .vi instruction. ANY_EXTEND would become a
2991 // a zero extend and the simm5 check in isel would fail.
2992 // FIXME: Should we ignore the upper bits in isel instead?
2993 unsigned ExtOpc =
2994 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2995 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2996 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
2997 // higher would involve overly constraining the register allocator for
2998 // no purpose.
2999 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) {
3000 if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
3001 VT.bitsLE(getLMUL1VT(VT)))
3002 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3003 }
3004 // Use M1 or smaller to avoid over constraining register allocation
3005 const MVT M1VT = getLMUL1VT(VT);
3006 auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
3007 SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT,
3008 DAG.getUNDEF(InnerVT), Scalar, VL);
3009 if (VT != InnerVT)
3010 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3011 DAG.getUNDEF(VT),
3012 Result, DAG.getConstant(0, DL, XLenVT));
3013 return Result;
3014
3015}
3016
3017// Is this a shuffle extracts either the even or odd elements of a vector?
3018// That is, specifically, either (a) or (b) below.
3019// t34: v8i8 = extract_subvector t11, Constant:i64<0>
3020// t33: v8i8 = extract_subvector t11, Constant:i64<8>
3021// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
3022// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
3023// Returns {Src Vector, Even Elements} om success
3024static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
3025 SDValue V2, ArrayRef<int> Mask,
3026 const RISCVSubtarget &Subtarget) {
3027 // Need to be able to widen the vector.
3028 if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
3029 return false;
3030
3031 // Both input must be extracts.
3032 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
3033 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3034 return false;
3035
3036 // Extracting from the same source.
3037 SDValue Src = V1.getOperand(0);
3038 if (Src != V2.getOperand(0))
3039 return false;
3040
3041 // Src needs to have twice the number of elements.
3042 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
3043 return false;
3044
3045 // The extracts must extract the two halves of the source.
3046 if (V1.getConstantOperandVal(1) != 0 ||
3047 V2.getConstantOperandVal(1) != Mask.size())
3048 return false;
3049
3050 // First index must be the first even or odd element from V1.
3051 if (Mask[0] != 0 && Mask[0] != 1)
3052 return false;
3053
3054 // The others must increase by 2 each time.
3055 // TODO: Support undef elements?
3056 for (unsigned i = 1; i != Mask.size(); ++i)
3057 if (Mask[i] != Mask[i - 1] + 2)
3058 return false;
3059
3060 return true;
3061}
3062
3063/// Is this shuffle interleaving contiguous elements from one vector into the
3064/// even elements and contiguous elements from another vector into the odd
3065/// elements. \p Src1 will contain the element that should be in the first even
3066/// element. \p Src2 will contain the element that should be in the first odd
3067/// element. These can be the first element in a source or the element half
3068/// way through the source.
3069static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
3070 int &OddSrc, const RISCVSubtarget &Subtarget) {
3071 // We need to be able to widen elements to the next larger integer type.
3072 if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
3073 return false;
3074
3075 int Size = Mask.size();
3076 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
3077
3078 SmallVector<unsigned, 2> StartIndexes;
3079 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
3080 return false;
3081
3082 EvenSrc = StartIndexes[0] % 2 ? StartIndexes[1] : StartIndexes[0];
3083 OddSrc = StartIndexes[0] % 2 ? StartIndexes[0] : StartIndexes[1];
3084
3085 // One source should be low half of first vector.
3086 if (EvenSrc != 0 && OddSrc != 0)
3087 return false;
3088
3089 return true;
3090}
3091
3092/// Match shuffles that concatenate two vectors, rotate the concatenation,
3093/// and then extract the original number of elements from the rotated result.
3094/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
3095/// returned rotation amount is for a rotate right, where elements move from
3096/// higher elements to lower elements. \p LoSrc indicates the first source
3097/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
3098/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
3099/// 0 or 1 if a rotation is found.
3100///
3101/// NOTE: We talk about rotate to the right which matches how bit shift and
3102/// rotate instructions are described where LSBs are on the right, but LLVM IR
3103/// and the table below write vectors with the lowest elements on the left.
3104static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
3105 int Size = Mask.size();
3106
3107 // We need to detect various ways of spelling a rotation:
3108 // [11, 12, 13, 14, 15, 0, 1, 2]
3109 // [-1, 12, 13, 14, -1, -1, 1, -1]
3110 // [-1, -1, -1, -1, -1, -1, 1, 2]
3111 // [ 3, 4, 5, 6, 7, 8, 9, 10]
3112 // [-1, 4, 5, 6, -1, -1, 9, -1]
3113 // [-1, 4, 5, 6, -1, -1, -1, -1]
3114 int Rotation = 0;
3115 LoSrc = -1;
3116 HiSrc = -1;
3117 for (int i = 0; i != Size; ++i) {
3118 int M = Mask[i];
3119 if (M < 0)
3120 continue;
3121
3122 // Determine where a rotate vector would have started.
3123 int StartIdx = i - (M % Size);
3124 // The identity rotation isn't interesting, stop.
3125 if (StartIdx == 0)
3126 return -1;
3127
3128 // If we found the tail of a vector the rotation must be the missing
3129 // front. If we found the head of a vector, it must be how much of the
3130 // head.
3131 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
3132
3133 if (Rotation == 0)
3134 Rotation = CandidateRotation;
3135 else if (Rotation != CandidateRotation)
3136 // The rotations don't match, so we can't match this mask.
3137 return -1;
3138
3139 // Compute which value this mask is pointing at.
3140 int MaskSrc = M < Size ? 0 : 1;
3141
3142 // Compute which of the two target values this index should be assigned to.
3143 // This reflects whether the high elements are remaining or the low elemnts
3144 // are remaining.
3145 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
3146
3147 // Either set up this value if we've not encountered it before, or check
3148 // that it remains consistent.
3149 if (TargetSrc < 0)
3150 TargetSrc = MaskSrc;
3151 else if (TargetSrc != MaskSrc)
3152 // This may be a rotation, but it pulls from the inputs in some
3153 // unsupported interleaving.
3154 return -1;
3155 }
3156
3157 // Check that we successfully analyzed the mask, and normalize the results.
3158 assert(Rotation != 0 && "Failed to locate a viable rotation!");
3159 assert((LoSrc >= 0 || HiSrc >= 0) &&
3160 "Failed to find a rotated input vector!");
3161
3162 return Rotation;
3163}
3164
3165// Lower a deinterleave shuffle to vnsrl.
3166// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
3167// -> [p, q, r, s] (EvenElts == false)
3168// VT is the type of the vector to return, <[vscale x ]n x ty>
3169// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
3171 bool EvenElts,
3172 const RISCVSubtarget &Subtarget,
3173 SelectionDAG &DAG) {
3174 // The result is a vector of type <m x n x ty>
3175 MVT ContainerVT = VT;
3176 // Convert fixed vectors to scalable if needed
3177 if (ContainerVT.isFixedLengthVector()) {
3178 assert(Src.getSimpleValueType().isFixedLengthVector());
3179 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
3180
3181 // The source is a vector of type <m x n*2 x ty>
3182 MVT SrcContainerVT =
3184 ContainerVT.getVectorElementCount() * 2);
3185 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3186 }
3187
3188 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3189
3190 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
3191 // This also converts FP to int.
3192 unsigned EltBits = ContainerVT.getScalarSizeInBits();
3193 MVT WideSrcContainerVT = MVT::getVectorVT(
3194 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
3195 Src = DAG.getBitcast(WideSrcContainerVT, Src);
3196
3197 // The integer version of the container type.
3198 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
3199
3200 // If we want even elements, then the shift amount is 0. Otherwise, shift by
3201 // the original element size.
3202 unsigned Shift = EvenElts ? 0 : EltBits;
3203 SDValue SplatShift = DAG.getNode(
3204 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
3205 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
3206 SDValue Res =
3207 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
3208 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
3209 // Cast back to FP if needed.
3210 Res = DAG.getBitcast(ContainerVT, Res);
3211
3212 if (VT.isFixedLengthVector())
3213 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
3214 return Res;
3215}
3216
3217static SDValue
3219 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
3220 SDValue VL,
3222 if (Merge.isUndef())
3224 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3225 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3226 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3227}
3228
3229static SDValue
3231 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
3232 SDValue VL,
3234 if (Merge.isUndef())
3236 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3237 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3238 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3239}
3240
3241// Lower the following shuffle to vslidedown.
3242// a)
3243// t49: v8i8 = extract_subvector t13, Constant:i64<0>
3244// t109: v8i8 = extract_subvector t13, Constant:i64<8>
3245// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
3246// b)
3247// t69: v16i16 = extract_subvector t68, Constant:i64<0>
3248// t23: v8i16 = extract_subvector t69, Constant:i64<0>
3249// t29: v4i16 = extract_subvector t23, Constant:i64<4>
3250// t26: v8i16 = extract_subvector t69, Constant:i64<8>
3251// t30: v4i16 = extract_subvector t26, Constant:i64<0>
3252// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
3254 SDValue V1, SDValue V2,
3255 ArrayRef<int> Mask,
3256 const RISCVSubtarget &Subtarget,
3257 SelectionDAG &DAG) {
3258 auto findNonEXTRACT_SUBVECTORParent =
3259 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
3260 uint64_t Offset = 0;
3261 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
3262 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
3263 // a scalable vector. But we don't want to match the case.
3264 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
3265 Offset += Parent.getConstantOperandVal(1);
3266 Parent = Parent.getOperand(0);
3267 }
3268 return std::make_pair(Parent, Offset);
3269 };
3270
3271 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
3272 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
3273
3274 // Extracting from the same source.
3275 SDValue Src = V1Src;
3276 if (Src != V2Src)
3277 return SDValue();
3278
3279 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
3280 SmallVector<int, 16> NewMask(Mask);
3281 for (size_t i = 0; i != NewMask.size(); ++i) {
3282 if (NewMask[i] == -1)
3283 continue;
3284
3285 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
3286 NewMask[i] = NewMask[i] + V1IndexOffset;
3287 } else {
3288 // Minus NewMask.size() is needed. Otherwise, the b case would be
3289 // <5,6,7,12> instead of <5,6,7,8>.
3290 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
3291 }
3292 }
3293
3294 // First index must be known and non-zero. It will be used as the slidedown
3295 // amount.
3296 if (NewMask[0] <= 0)
3297 return SDValue();
3298
3299 // NewMask is also continuous.
3300 for (unsigned i = 1; i != NewMask.size(); ++i)
3301 if (NewMask[i - 1] + 1 != NewMask[i])
3302 return SDValue();
3303
3304 MVT XLenVT = Subtarget.getXLenVT();
3305 MVT SrcVT = Src.getSimpleValueType();
3306 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3307 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
3308 SDValue Slidedown =
3309 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3310 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
3311 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
3312 return DAG.getNode(
3314 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
3315 DAG.getConstant(0, DL, XLenVT));
3316}
3317
3318// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
3319// to create an interleaved vector of <[vscale x] n*2 x ty>.
3320// This requires that the size of ty is less than the subtarget's maximum ELEN.
3322 SelectionDAG &DAG,
3323 const RISCVSubtarget &Subtarget) {
3324 MVT VecVT = EvenV.getSimpleValueType();
3325 MVT VecContainerVT = VecVT; // <vscale x n x ty>
3326 // Convert fixed vectors to scalable if needed
3327 if (VecContainerVT.isFixedLengthVector()) {
3328 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
3329 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
3330 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
3331 }
3332
3333 assert(VecVT.getScalarSizeInBits() < Subtarget.getELEN());
3334
3335 // We're working with a vector of the same size as the resulting
3336 // interleaved vector, but with half the number of elements and
3337 // twice the SEW (Hence the restriction on not using the maximum
3338 // ELEN)
3339 MVT WideVT =
3341 VecVT.getVectorElementCount());
3342 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
3343 if (WideContainerVT.isFixedLengthVector())
3344 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
3345
3346 // Bitcast the input vectors to integers in case they are FP
3347 VecContainerVT = VecContainerVT.changeTypeToInteger();
3348 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
3349 OddV = DAG.getBitcast(VecContainerVT, OddV);
3350
3351 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
3352 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
3353
3354 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
3355 // vwaddu.vv
3356 SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,
3357 EvenV, OddV, Passthru, Mask, VL);
3358
3359 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
3360 SDValue AllOnesVec = DAG.getSplatVector(
3361 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
3362 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV,
3363 AllOnesVec, Passthru, Mask, VL);
3364
3365 // Add the two together so we get
3366 // (OddV * 0xff...ff) + (OddV + EvenV)
3367 // = (OddV * 0x100...00) + EvenV
3368 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
3369 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
3370 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, Interleaved,
3371 OddsMul, Passthru, Mask, VL);
3372
3373 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
3374 MVT ResultContainerVT = MVT::getVectorVT(
3375 VecVT.getVectorElementType(), // Make sure to use original type
3376 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
3377 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
3378
3379 // Convert back to a fixed vector if needed
3380 MVT ResultVT =
3383 if (ResultVT.isFixedLengthVector())
3384 Interleaved =
3385 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
3386
3387 return Interleaved;
3388}
3389
3391 const RISCVSubtarget &Subtarget) {
3392 SDValue V1 = Op.getOperand(0);
3393 SDValue V2 = Op.getOperand(1);
3394 SDLoc DL(Op);
3395 MVT XLenVT = Subtarget.getXLenVT();
3396 MVT VT = Op.getSimpleValueType();
3397 unsigned NumElts = VT.getVectorNumElements();
3398 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3399
3400 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3401
3402 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3403
3404 if (SVN->isSplat()) {
3405 const int Lane = SVN->getSplatIndex();
3406 if (Lane >= 0) {
3407 MVT SVT = VT.getVectorElementType();
3408
3409 // Turn splatted vector load into a strided load with an X0 stride.
3410 SDValue V = V1;
3411 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
3412 // with undef.
3413 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
3414 int Offset = Lane;
3415 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
3416 int OpElements =
3417 V.getOperand(0).getSimpleValueType().getVectorNumElements();
3418 V = V.getOperand(Offset / OpElements);
3419 Offset %= OpElements;
3420 }
3421
3422 // We need to ensure the load isn't atomic or volatile.
3423 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
3424 auto *Ld = cast<LoadSDNode>(V);
3425 Offset *= SVT.getStoreSize();
3426 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
3428
3429 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
3430 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
3431 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
3432 SDValue IntID =
3433 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
3434 SDValue Ops[] = {Ld->getChain(),
3435 IntID,
3436 DAG.getUNDEF(ContainerVT),
3437 NewAddr,
3438 DAG.getRegister(RISCV::X0, XLenVT),
3439 VL};
3440 SDValue NewLoad = DAG.getMemIntrinsicNode(
3441 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
3443 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
3444 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
3445 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
3446 }
3447
3448 // Otherwise use a scalar load and splat. This will give the best
3449 // opportunity to fold a splat into the operation. ISel can turn it into
3450 // the x0 strided load if we aren't able to fold away the select.
3451 if (SVT.isFloatingPoint())
3452 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
3453 Ld->getPointerInfo().getWithOffset(Offset),
3454 Ld->getOriginalAlign(),
3455 Ld->getMemOperand()->getFlags());
3456 else
3457 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
3458 Ld->getPointerInfo().getWithOffset(Offset), SVT,
3459 Ld->getOriginalAlign(),
3460 Ld->getMemOperand()->getFlags());
3462
3463 unsigned Opc =
3465 SDValue Splat =
3466 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
3467 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3468 }
3469
3470 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
3471 assert(Lane < (int)NumElts && "Unexpected lane!");
3472 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
3473 V1, DAG.getConstant(Lane, DL, XLenVT),
3474 DAG.getUNDEF(ContainerVT), TrueMask, VL);
3475 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3476 }
3477 }
3478
3479 ArrayRef<int> Mask = SVN->getMask();
3480
3481 if (SDValue V =
3482 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
3483 return V;
3484
3485 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
3486 // be undef which can be handled with a single SLIDEDOWN/UP.
3487 int LoSrc, HiSrc;
3488 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
3489 if (Rotation > 0) {
3490 SDValue LoV, HiV;
3491 if (LoSrc >= 0) {
3492 LoV = LoSrc == 0 ? V1 : V2;
3493 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
3494 }
3495 if (HiSrc >= 0) {
3496 HiV = HiSrc == 0 ? V1 : V2;
3497 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
3498 }
3499
3500 // We found a rotation. We need to slide HiV down by Rotation. Then we need
3501 // to slide LoV up by (NumElts - Rotation).
3502 unsigned InvRotate = NumElts - Rotation;
3503
3504 SDValue Res = DAG.getUNDEF(ContainerVT);
3505 if (HiV) {
3506 // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN.
3507 // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it
3508 // causes multiple vsetvlis in some test cases such as lowering
3509 // reduce.mul
3510 SDValue DownVL = VL;
3511 if (LoV)
3512 DownVL = DAG.getConstant(InvRotate, DL, XLenVT);
3513 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
3514 DAG.getConstant(Rotation, DL, XLenVT), TrueMask,
3515 DownVL);
3516 }
3517 if (LoV)
3518 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
3519 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
3521
3522 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3523 }
3524
3525 // If this is a deinterleave and we can widen the vector, then we can use
3526 // vnsrl to deinterleave.
3527 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
3528 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
3529 Subtarget, DAG);
3530 }
3531
3532 // Detect an interleave shuffle and lower to
3533 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
3534 int EvenSrc, OddSrc;
3535 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
3536 // Extract the halves of the vectors.
3537 MVT HalfVT = VT.getHalfNumVectorElementsVT();
3538
3539 int Size = Mask.size();
3540 SDValue EvenV, OddV;
3541 assert(EvenSrc >= 0 && "Undef source?");
3542 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
3543 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
3544 DAG.getConstant(EvenSrc % Size, DL, XLenVT));
3545
3546 assert(OddSrc >= 0 && "Undef source?");
3547 OddV = (OddSrc / Size) == 0 ? V1 : V2;
3548 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
3549 DAG.getConstant(OddSrc % Size, DL, XLenVT));
3550
3551 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
3552 }
3553
3554 // Detect shuffles which can be re-expressed as vector selects; these are
3555 // shuffles in which each element in the destination is taken from an element
3556 // at the corresponding index in either source vectors.
3557 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
3558 int MaskIndex = MaskIdx.value();
3559 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
3560 });
3561
3562 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
3563
3564 SmallVector<SDValue> MaskVals;
3565 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
3566 // merged with a second vrgather.
3567 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
3568
3569 // By default we preserve the original operand order, and use a mask to
3570 // select LHS as true and RHS as false. However, since RVV vector selects may
3571 // feature splats but only on the LHS, we may choose to invert our mask and
3572 // instead select between RHS and LHS.
3573 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
3574 bool InvertMask = IsSelect == SwapOps;
3575
3576 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
3577 // half.
3578 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
3579
3580 // Now construct the mask that will be used by the vselect or blended
3581 // vrgather operation. For vrgathers, construct the appropriate indices into
3582 // each vector.
3583 for (int MaskIndex : Mask) {
3584 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
3585 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3586 if (!IsSelect) {
3587 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
3588 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
3589 ? DAG.getConstant(MaskIndex, DL, XLenVT)
3590 : DAG.getUNDEF(XLenVT));
3591 GatherIndicesRHS.push_back(
3592 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
3593 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
3594 if (IsLHSOrUndefIndex && MaskIndex >= 0)
3595 ++LHSIndexCounts[MaskIndex];
3596 if (!IsLHSOrUndefIndex)
3597 ++RHSIndexCounts[MaskIndex - NumElts];
3598 }
3599 }
3600
3601 if (SwapOps) {
3602 std::swap(V1, V2);
3603 std::swap(GatherIndicesLHS, GatherIndicesRHS);
3604 }
3605
3606 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
3607 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3608 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3609
3610 if (IsSelect)
3611 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
3612
3613 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
3614 // On such a large vector we're unable to use i8 as the index type.
3615 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
3616 // may involve vector splitting if we're already at LMUL=8, or our
3617 // user-supplied maximum fixed-length LMUL.
3618 return SDValue();
3619 }
3620
3621 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
3622 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
3623 MVT IndexVT = VT.changeTypeToInteger();
3624 // Since we can't introduce illegal index types at this stage, use i16 and
3625 // vrgatherei16 if the corresponding index type for plain vrgather is greater
3626 // than XLenVT.
3627 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
3628 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
3629 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
3630 }
3631
3632 MVT IndexContainerVT =
3633 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
3634
3635 SDValue Gather;
3636 // TODO: This doesn't trigger for i64 vectors on RV32, since there we
3637 // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
3638 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
3639 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
3640 Subtarget);
3641 } else {
3642 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
3643 // If only one index is used, we can use a "splat" vrgather.
3644 // TODO: We can splat the most-common index and fix-up any stragglers, if
3645 // that's beneficial.
3646 if (LHSIndexCounts.size() == 1) {
3647 int SplatIndex = LHSIndexCounts.begin()->getFirst();
3648 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
3649 DAG.getConstant(SplatIndex, DL, XLenVT),
3650 DAG.getUNDEF(ContainerVT), TrueMask, VL);
3651 } else {
3652 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
3653 LHSIndices =
3654 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
3655
3656 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
3657 DAG.getUNDEF(ContainerVT), TrueMask, VL);
3658 }
3659 }
3660
3661 // If a second vector operand is used by this shuffle, blend it in with an
3662 // additional vrgather.
3663 if (!V2.isUndef()) {
3664 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
3665
3666 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
3667 SelectMask =
3668 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
3669
3670 // If only one index is used, we can use a "splat" vrgather.
3671 // TODO: We can splat the most-common index and fix-up any stragglers, if
3672 // that's beneficial.
3673 if (RHSIndexCounts.size() == 1) {
3674 int SplatIndex = RHSIndexCounts.begin()->getFirst();
3675 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
3676 DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
3677 SelectMask, VL);
3678 } else {
3679 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
3680 RHSIndices =
3681 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
3682 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
3683 SelectMask, VL);
3684 }
3685 }
3686
3687 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3688}
3689
3691 // Support splats for any type. These should type legalize well.
3692 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
3693 return true;
3694
3695 // Only support legal VTs for other shuffles for now.
3696 if (!isTypeLegal(VT))
3697 return false;
3698
3699 MVT SVT = VT.getSimpleVT();
3700
3701 int Dummy1, Dummy2;
3702 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
3703 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
3704}
3705
3706// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
3707// the exponent.
3708SDValue
3709RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
3710 SelectionDAG &DAG) const {
3711 MVT VT = Op.getSimpleValueType();
3712 unsigned EltSize = VT.getScalarSizeInBits();
3713 SDValue Src = Op.getOperand(0);
3714 SDLoc DL(Op);
3715
3716 // We choose FP type that can represent the value if possible. Otherwise, we
3717 // use rounding to zero conversion for correct exponent of the result.
3718 // TODO: Use f16 for i8 when possible?
3719 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
3720 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
3721 FloatEltVT = MVT::f32;
3722 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
3723
3724 // Legal types should have been checked in the RISCVTargetLowering
3725 // constructor.
3726 // TODO: Splitting may make sense in some cases.
3727 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
3728 "Expected legal float type!");
3729
3730 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
3731 // The trailing zero count is equal to log2 of this single bit value.
3732 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
3733 SDValue Neg = DAG.getNegative(Src, DL, VT);
3734 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
3735 }
3736
3737 // We have a legal FP type, convert to it.
3738 SDValue FloatVal;
3739 if (FloatVT.bitsGT(VT)) {
3740 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
3741 } else {
3742 // Use RTZ to avoid rounding influencing exponent of FloatVal.
3743 MVT ContainerVT = VT;
3744 if (VT.isFixedLengthVector()) {
3745 ContainerVT = getContainerForFixedLengthVector(VT);
3746 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3747 }
3748
3749 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3750 SDValue RTZRM =
3752 MVT ContainerFloatVT =
3753 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
3754 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
3755 Src, Mask, RTZRM, VL);
3756 if (VT.isFixedLengthVector())
3757 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
3758 }
3759 // Bitcast to integer and shift the exponent to the LSB.
3760 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
3761 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
3762 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
3763 SDValue Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
3764 DAG.getConstant(ShiftAmt, DL, IntVT));
3765 // Restore back to original type. Truncation after SRL is to generate vnsrl.
3766 if (IntVT.bitsLT(VT))
3767 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
3768 else if (IntVT.bitsGT(VT))
3769 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
3770 // The exponent contains log2 of the value in biased form.
3771 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
3772
3773 // For trailing zeros, we just need to subtract the bias.
3774 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
3775 return DAG.getNode(ISD::SUB, DL, VT, Exp,
3776 DAG.getConstant(ExponentBias, DL, VT));
3777
3778 // For leading zeros, we need to remove the bias and convert from log2 to
3779 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
3780 unsigned Adjust = ExponentBias + (EltSize - 1);
3781 SDValue Res =
3782 DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
3783 // The above result with zero input equals to Adjust which is greater than
3784 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
3785 if (Op.getOpcode() == ISD::CTLZ)
3786 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
3787 return Res;
3788}
3789
3790// While RVV has alignment restrictions, we should always be able to load as a
3791// legal equivalently-sized byte-typed vector instead. This method is
3792// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
3793// the load is already correctly-aligned, it returns SDValue().
3794SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
3795 SelectionDAG &DAG) const {
3796 auto *Load = cast<LoadSDNode>(Op);
3797 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
3798
3800 Load->getMemoryVT(),
3801 *Load->getMemOperand()))
3802 return SDValue();
3803
3804 SDLoc DL(Op);
3805 MVT VT = Op.getSimpleValueType();
3806 unsigned EltSizeBits = VT.getScalarSizeInBits();
3807 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
3808 "Unexpected unaligned RVV load type");
3809 MVT NewVT =
3810 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
3811 assert(NewVT.isValid() &&
3812 "Expecting equally-sized RVV vector types to be legal");
3813 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
3814 Load->getPointerInfo(), Load->getOriginalAlign(),
3815 Load->getMemOperand()->getFlags());
3816 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
3817}
3818
3819// While RVV has alignment restrictions, we should always be able to store as a
3820// legal equivalently-sized byte-typed vector instead. This method is
3821// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
3822// returns SDValue() if the store is already correctly aligned.
3823SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
3824 SelectionDAG &DAG) const {
3825 auto *Store = cast<StoreSDNode>(Op);
3826 assert(Store && Store->getValue().getValueType().isVector() &&
3827 "Expected vector store");
3828
3830 Store->getMemoryVT(),
3831 *Store->getMemOperand()))
3832 return SDValue();
3833
3834 SDLoc DL(Op);
3835 SDValue StoredVal = Store->getValue();
3836 MVT VT = StoredVal.getSimpleValueType();
3837 unsigned EltSizeBits = VT.getScalarSizeInBits();
3838 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
3839 "Unexpected unaligned RVV store type");
3840 MVT NewVT =
3841 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
3842 assert(NewVT.isValid() &&
3843 "Expecting equally-sized RVV vector types to be legal");
3844 StoredVal = DAG.getBitcast(NewVT, StoredVal);
3845 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
3846 Store->getPointerInfo(), Store->getOriginalAlign(),
3847 Store->getMemOperand()->getFlags());
3848}
3849
3851 const RISCVSubtarget &Subtarget) {
3852 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
3853
3854 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
3855
3856 // All simm32 constants should be handled by isel.
3857 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
3858 // this check redundant, but small immediates are common so this check
3859 // should have better compile time.
3860 if (isInt<32>(Imm))
3861 return Op;
3862
3863 // We only need to cost the immediate, if constant pool lowering is enabled.
3864 if (!Subtarget.useConstantPoolForLargeInts())
3865 return Op;
3866
3868 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
3869 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
3870 return Op;
3871
3872 // Expand to a constant pool using the default expansion code.
3873 return SDValue();
3874}
3875
3877 const RISCVSubtarget &Subtarget) {
3878 SDLoc dl(Op);
3879 AtomicOrdering FenceOrdering =
3880 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
3881 SyncScope::ID FenceSSID =
3882 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3883
3884 if (Subtarget.hasStdExtZtso()) {
3885 // The only fence that needs an instruction is a sequentially-consistent
3886 // cross-thread fence.
3887 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3888 FenceSSID == SyncScope::System)
3889 return Op;
3890
3891 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3892 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
3893 }
3894
3895 // singlethread fences only synchronize with signal handlers on the same
3896 // thread and thus only need to preserve instruction order, not actually
3897 // enforce memory ordering.
3898 if (FenceSSID == SyncScope::SingleThread)
3899 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3900 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
3901
3902 return Op;
3903}
3904
3906 SelectionDAG &DAG) const {
3907 switch (Op.getOpcode()) {
3908 default:
3909 report_fatal_error("unimplemented operand");
3910 case ISD::ATOMIC_FENCE:
3911 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
3912 case ISD::GlobalAddress:
3913 return lowerGlobalAddress(Op, DAG);
3914 case ISD::BlockAddress:
3915 return lowerBlockAddress(Op, DAG);
3916 case ISD::ConstantPool:
3917 return lowerConstantPool(Op, DAG);
3918 case ISD::JumpTable:
3919 return lowerJumpTable(Op, DAG);
3921 return lowerGlobalTLSAddress(Op, DAG);
3922 case ISD::Constant:
3923 return lowerConstant(Op, DAG, Subtarget);
3924 case ISD::SELECT:
3925 return lowerSELECT(Op, DAG);
3926 case ISD::BRCOND:
3927 return lowerBRCOND(Op, DAG);
3928 case ISD::VASTART:
3929 return lowerVASTART(Op, DAG);
3930 case ISD::FRAMEADDR:
3931 return lowerFRAMEADDR(Op, DAG);
3932 case ISD::RETURNADDR:
3933 return lowerRETURNADDR(Op, DAG);
3934 case ISD::SHL_PARTS:
3935 return lowerShiftLeftParts(Op, DAG);
3936 case ISD::SRA_PARTS:
3937 return lowerShiftRightParts(Op, DAG, true);
3938 case ISD::SRL_PARTS:
3939 return lowerShiftRightParts(Op, DAG, false);
3940 case ISD::BITCAST: {
3941 SDLoc DL(Op);
3942 EVT VT = Op.getValueType();
3943 SDValue Op0 = Op.getOperand(0);
3944 EVT Op0VT = Op0.getValueType();
3945 MVT XLenVT = Subtarget.getXLenVT();
3946 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
3947 Subtarget.hasStdExtZfhOrZfhmin()) {
3948 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
3949 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
3950 return FPConv;
3951 }
3952 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
3953 Subtarget.hasStdExtF()) {
3954 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3955 SDValue FPConv =
3957 return FPConv;
3958 }
3959 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 &&
3960 Subtarget.hasStdExtZfa()) {
3962 DAG.getConstant(0, DL, MVT::i32));
3964 DAG.getConstant(1, DL, MVT::i32));
3965 SDValue RetReg =
3967 return RetReg;
3968 }
3969
3970 // Consider other scalar<->scalar casts as legal if the types are legal.
3971 // Otherwise expand them.
3972 if (!VT.isVector() && !Op0VT.isVector()) {
3973 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
3974 return Op;
3975 return SDValue();
3976 }
3977
3978 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
3979 "Unexpected types");
3980
3981 if (VT.isFixedLengthVector()) {
3982 // We can handle fixed length vector bitcasts with a simple replacement
3983 // in isel.
3984 if (Op0VT.isFixedLengthVector())
3985 return Op;
3986 // When bitcasting from scalar to fixed-length vector, insert the scalar
3987 // into a one-element vector of the result type, and perform a vector
3988 // bitcast.
3989 if (!Op0VT.isVector()) {
3990 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
3991 if (!isTypeLegal(BVT))
3992 return SDValue();
3993 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
3994 DAG.getUNDEF(BVT), Op0,
3995 DAG.getConstant(0, DL, XLenVT)));
3996 }
3997 return SDValue();
3998 }
3999 // Custom-legalize bitcasts from fixed-length vector types to scalar types
4000 // thus: bitcast the vector to a one-element vector type whose element type
4001 // is the same as the result type, and extract the first element.
4002 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
4003 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
4004 if (!isTypeLegal(BVT))
4005 return SDValue();
4006 SDValue BVec = DAG.getBitcast(BVT, Op0);
4007 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
4008 DAG.getConstant(0, DL, XLenVT));
4009 }
4010 return SDValue();
4011 }
4013 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4015 return LowerINTRINSIC_W_CHAIN(Op, DAG);
4017 return LowerINTRINSIC_VOID(Op, DAG);
4018 case ISD::BITREVERSE: {
4019 MVT VT = Op.getSimpleValueType();
4020 SDLoc DL(Op);
4021 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
4022 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
4023 // Expand bitreverse to a bswap(rev8) followed by brev8.
4024 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
4025 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
4026 }
4027 case ISD::TRUNCATE:
4028 // Only custom-lower vector truncates
4029 if (!Op.getSimpleValueType().isVector())
4030 return Op;
4031 return lowerVectorTruncLike(Op, DAG);
4032 case ISD::ANY_EXTEND:
4033 case ISD::ZERO_EXTEND:
4034 if (Op.getOperand(0).getValueType().isVector() &&
4035 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
4036 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
4037 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
4038 case ISD::SIGN_EXTEND:
4039 if (Op.getOperand(0).getValueType().isVector() &&
4040 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
4041 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
4042 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
4044 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
4046 return lowerINSERT_VECTOR_ELT(Op, DAG);
4048 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
4049 case ISD::VSCALE: {
4050 MVT VT = Op.getSimpleValueType();
4051 SDLoc DL(Op);
4052 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
4053 // We define our scalable vector types for lmul=1 to use a 64 bit known
4054 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
4055 // vscale as VLENB / 8.
4056 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
4057 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
4058 report_fatal_error("Support for VLEN==32 is incomplete.");
4059 // We assume VLENB is a multiple of 8. We manually choose the best shift
4060 // here because SimplifyDemandedBits isn't always able to simplify it.
4061 uint64_t Val = Op.getConstantOperandVal(0);
4062 if (isPowerOf2_64(Val)) {
4063 uint64_t Log2 = Log2_64(Val);
4064 if (Log2 < 3)
4065 return DAG.getNode(ISD::SRL, DL, VT, VLENB,
4066 DAG.getConstant(3 - Log2, DL, VT));
4067 if (Log2 > 3)
4068 return DAG.getNode(ISD::SHL, DL, VT, VLENB,
4069 DAG.getConstant(Log2 - 3, DL, VT));
4070 return VLENB;
4071 }
4072 // If the multiplier is a multiple of 8, scale it down to avoid needing
4073 // to shift the VLENB value.
4074 if ((Val % 8) == 0)
4075 return DAG.getNode(ISD::MUL, DL, VT, VLENB,
4076 DAG.getConstant(Val / 8, DL, VT));
4077
4078 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
4079 DAG.getConstant(3, DL, VT));
4080 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
4081 }
4082 case ISD::FPOWI: {
4083 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
4084 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
4085 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
4086 Op.getOperand(1).getValueType() == MVT::i32) {
4087 SDLoc DL(Op);
4088 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
4089 SDValue Powi =
4090 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
4091 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
4092 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
4093 }
4094 return SDValue();
4095 }
4096 case ISD::FP_EXTEND:
4097 case ISD::FP_ROUND:
4098 if (!Op.getValueType().isVector())
4099 return Op;
4100 return lowerVectorFPExtendOrRoundLike(Op, DAG);
4102 return lowerStrictFPExtend(Op, DAG);
4103 case ISD::FP_TO_SINT:
4104 case ISD::FP_TO_UINT:
4105 case ISD::SINT_TO_FP:
4106 case ISD::UINT_TO_FP: {
4107 // RVV can only do fp<->int conversions to types half/double the size as
4108 // the source. We custom-lower any conversions that do two hops into
4109 // sequences.
4110 MVT VT = Op.getSimpleValueType();
4111 if (!VT.isVector())
4112 return Op;
4113 SDLoc DL(Op);
4114 SDValue Src = Op.getOperand(0);
4115 MVT EltVT = VT.getVectorElementType();
4116 MVT SrcVT = Src.getSimpleValueType();
4117 MVT SrcEltVT = SrcVT.getVectorElementType();
4118 unsigned EltSize = EltVT.getSizeInBits();
4119 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
4120 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
4121 "Unexpected vector element types");
4122
4123 bool IsInt2FP = SrcEltVT.isInteger();
4124 // Widening conversions
4125 if (EltSize > (2 * SrcEltSize)) {
4126 if (IsInt2FP) {
4127 // Do a regular integer sign/zero extension then convert to float.
4128 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
4130 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
4133 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
4134 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
4135 }
4136 // FP2Int
4137 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
4138 // Do one doubling fp_extend then complete the operation by converting
4139 // to int.
4141 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
4142 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
4143 }
4144
4145 // Narrowing conversions
4146 if (SrcEltSize > (2 * EltSize)) {
4147 if (IsInt2FP) {
4148 // One narrowing int_to_fp, then an fp_round.
4149 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
4151 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
4152 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
4153 }
4154 // FP2Int
4155 // One narrowing fp_to_int, then truncate the integer. If the float isn't
4156 // representable by the integer, the result is poison.
4157 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
4159 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
4160 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
4161 }
4162
4163 // Scalable vectors can exit here. Patterns will handle equally-sized
4164 // conversions halving/doubling ones.
4165 if (!VT.isFixedLengthVector())
4166 return Op;
4167
4168 // For fixed-length vectors we lower to a custom "VL" node.
4169 unsigned RVVOpc = 0;
4170 switch (Op.getOpcode()) {
4171 default:
4172 llvm_unreachable("Impossible opcode");