LLVM  16.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
16 #include "RISCV.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
31 #include "llvm/IR/DiagnosticInfo.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/PatternMatch.h"
37 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/KnownBits.h"
42 #include <optional>
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "riscv-lower"
47 
48 STATISTIC(NumTailCalls, "Number of tail calls");
49 
51  DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
52  cl::desc("Give the maximum size (in number of nodes) of the web of "
53  "instructions that we will consider for VW expansion"),
54  cl::init(18));
55 
56 static cl::opt<bool>
57  AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
58  cl::desc("Allow the formation of VW_W operations (e.g., "
59  "VWADD_W) with splat constants"),
60  cl::init(false));
61 
63  const RISCVSubtarget &STI)
64  : TargetLowering(TM), Subtarget(STI) {
65 
66  if (Subtarget.isRV32E())
67  report_fatal_error("Codegen not yet implemented for RV32E");
68 
69  RISCVABI::ABI ABI = Subtarget.getTargetABI();
70  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
71 
73  !Subtarget.hasStdExtF()) {
74  errs() << "Hard-float 'f' ABI can't be used for a target that "
75  "doesn't support the F instruction set extension (ignoring "
76  "target-abi)\n";
78  } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
79  !Subtarget.hasStdExtD()) {
80  errs() << "Hard-float 'd' ABI can't be used for a target that "
81  "doesn't support the D instruction set extension (ignoring "
82  "target-abi)\n";
84  }
85 
86  switch (ABI) {
87  default:
88  report_fatal_error("Don't know how to lower this ABI");
92  case RISCVABI::ABI_LP64:
95  break;
96  }
97 
98  MVT XLenVT = Subtarget.getXLenVT();
99 
100  // Set up the register classes.
101  addRegisterClass(XLenVT, &RISCV::GPRRegClass);
102 
103  if (Subtarget.hasStdExtZfh())
104  addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
105  if (Subtarget.hasStdExtF())
106  addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
107  if (Subtarget.hasStdExtD())
108  addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
109 
110  static const MVT::SimpleValueType BoolVecVTs[] = {
113  static const MVT::SimpleValueType IntVecVTs[] = {
119  static const MVT::SimpleValueType F16VecVTs[] = {
122  static const MVT::SimpleValueType F32VecVTs[] = {
124  static const MVT::SimpleValueType F64VecVTs[] = {
126 
127  if (Subtarget.hasVInstructions()) {
128  auto addRegClassForRVV = [this](MVT VT) {
129  // Disable the smallest fractional LMUL types if ELEN is less than
130  // RVVBitsPerBlock.
131  unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
132  if (VT.getVectorMinNumElements() < MinElts)
133  return;
134 
135  unsigned Size = VT.getSizeInBits().getKnownMinValue();
136  const TargetRegisterClass *RC;
137  if (Size <= RISCV::RVVBitsPerBlock)
138  RC = &RISCV::VRRegClass;
139  else if (Size == 2 * RISCV::RVVBitsPerBlock)
140  RC = &RISCV::VRM2RegClass;
141  else if (Size == 4 * RISCV::RVVBitsPerBlock)
142  RC = &RISCV::VRM4RegClass;
143  else if (Size == 8 * RISCV::RVVBitsPerBlock)
144  RC = &RISCV::VRM8RegClass;
145  else
146  llvm_unreachable("Unexpected size");
147 
148  addRegisterClass(VT, RC);
149  };
150 
151  for (MVT VT : BoolVecVTs)
152  addRegClassForRVV(VT);
153  for (MVT VT : IntVecVTs) {
154  if (VT.getVectorElementType() == MVT::i64 &&
155  !Subtarget.hasVInstructionsI64())
156  continue;
157  addRegClassForRVV(VT);
158  }
159 
160  if (Subtarget.hasVInstructionsF16())
161  for (MVT VT : F16VecVTs)
162  addRegClassForRVV(VT);
163 
164  if (Subtarget.hasVInstructionsF32())
165  for (MVT VT : F32VecVTs)
166  addRegClassForRVV(VT);
167 
168  if (Subtarget.hasVInstructionsF64())
169  for (MVT VT : F64VecVTs)
170  addRegClassForRVV(VT);
171 
172  if (Subtarget.useRVVForFixedLengthVectors()) {
173  auto addRegClassForFixedVectors = [this](MVT VT) {
174  MVT ContainerVT = getContainerForFixedLengthVector(VT);
175  unsigned RCID = getRegClassIDForVecVT(ContainerVT);
176  const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
177  addRegisterClass(VT, TRI.getRegClass(RCID));
178  };
180  if (useRVVForFixedLengthVectorVT(VT))
181  addRegClassForFixedVectors(VT);
182 
184  if (useRVVForFixedLengthVectorVT(VT))
185  addRegClassForFixedVectors(VT);
186  }
187  }
188 
189  // Compute derived properties from the register classes.
191 
193 
195  MVT::i1, Promote);
196  // DAGCombiner can call isLoadExtLegal for types that aren't legal.
198  MVT::i1, Promote);
199 
200  // TODO: add all necessary setOperationAction calls.
202 
207 
214 
216 
219 
221 
223 
224  if (!Subtarget.hasStdExtZbb())
226 
227  if (Subtarget.is64Bit()) {
229 
231 
233  MVT::i32, Custom);
234 
236  MVT::i32, Custom);
237  } else {
239  {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
240  nullptr);
241  setLibcallName(RTLIB::MULO_I64, nullptr);
242  }
243 
244  if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
246  } else {
247  if (Subtarget.is64Bit()) {
249  } else {
251  }
252  }
253 
254  if (!Subtarget.hasStdExtM()) {
256  XLenVT, Expand);
257  } else {
258  if (Subtarget.is64Bit()) {
261  }
262  }
263 
266  Expand);
267 
269  Custom);
270 
271  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
272  if (Subtarget.is64Bit())
274  } else {
276  }
277 
278  // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
279  // pattern match it directly in isel.
281  (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
282  ? Legal
283  : Expand);
284  // Zbkb can use rev8+brev8 to implement bitreverse.
286  Subtarget.hasStdExtZbkb() ? Custom : Expand);
287 
288  if (Subtarget.hasStdExtZbb()) {
290  Legal);
291 
292  if (Subtarget.is64Bit())
295  MVT::i32, Custom);
296  } else {
298  }
299 
300  if (Subtarget.is64Bit())
302 
304 
305  static const unsigned FPLegalNodeTypes[] = {
312 
313  static const ISD::CondCode FPCCToExpand[] = {
317 
318  static const unsigned FPOpToExpand[] = {
321 
322  if (Subtarget.hasStdExtZfh())
324 
325  if (Subtarget.hasStdExtZfh()) {
326  setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
335  setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
339 
343  MVT::f16, Promote);
344 
345  // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
346  // complete support for all operations in LegalizeDAG.
351  MVT::f16, Promote);
352 
353  // We need to custom promote this.
354  if (Subtarget.is64Bit())
356  }
357 
358  if (Subtarget.hasStdExtF()) {
359  setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
366  setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
370  setOperationAction(FPOpToExpand, MVT::f32, Expand);
373  }
374 
375  if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
377 
378  if (Subtarget.hasStdExtD()) {
379  setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
380  if (Subtarget.is64Bit()) {
387  }
390  setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
396  setOperationAction(FPOpToExpand, MVT::f64, Expand);
399  }
400 
401  if (Subtarget.is64Bit())
404  MVT::i32, Custom);
405 
406  if (Subtarget.hasStdExtF()) {
408  Custom);
409 
412  XLenVT, Legal);
413 
416  }
417 
420  XLenVT, Custom);
421 
423 
424  if (Subtarget.is64Bit())
426 
427  // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
428  // Unfortunately this can't be determined just from the ISA naming string.
430  Subtarget.is64Bit() ? Legal : Custom);
431 
434  if (Subtarget.is64Bit())
436 
437  if (Subtarget.hasStdExtA()) {
440  } else if (Subtarget.hasForcedAtomics()) {
442  } else {
444  }
445 
447 
448  if (Subtarget.hasVInstructions()) {
450 
452 
453  // RVV intrinsics may have illegal operands.
454  // We also need to custom legalize vmv.x.s.
456  {MVT::i8, MVT::i16}, Custom);
457  if (Subtarget.is64Bit())
459  else
461  MVT::i64, Custom);
462 
464  MVT::Other, Custom);
465 
466  static const unsigned IntegerVPOps[] = {
467  ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
468  ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
469  ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
470  ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
471  ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
472  ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
473  ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
474  ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
475  ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
476  ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
477  ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX};
478 
479  static const unsigned FloatingPointVPOps[] = {
480  ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
481  ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
482  ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
483  ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
484  ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
485  ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
486  ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
487  ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
488  ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
489  ISD::VP_FRINT, ISD::VP_FNEARBYINT};
490 
491  static const unsigned IntegerVecReduceOps[] = {
495 
496  static const unsigned FloatingPointVecReduceOps[] = {
499 
500  if (!Subtarget.is64Bit()) {
501  // We must custom-lower certain vXi64 operations on RV32 due to the vector
502  // element type being illegal.
504  MVT::i64, Custom);
505 
506  setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
507 
508  setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
509  ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
510  ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
511  ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
512  MVT::i64, Custom);
513  }
514 
515  for (MVT VT : BoolVecVTs) {
516  if (!isTypeLegal(VT))
517  continue;
518 
520 
521  // Mask VTs are custom-expanded into a series of standard nodes
524  VT, Custom);
525 
527  Custom);
528 
531  {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
532  Expand);
533 
534  setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
535 
538  Custom);
539 
541  {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
542  Custom);
543 
544  // RVV has native int->float & float->int conversions where the
545  // element type sizes are within one power-of-two of each other. Any
546  // wider distances between type sizes have to be lowered as sequences
547  // which progressively narrow the gap in stages.
550  VT, Custom);
552  Custom);
553 
554  // Expand all extending loads to types larger than this, and truncating
555  // stores from types larger than this.
556  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
557  setTruncStoreAction(OtherVT, VT, Expand);
559  VT, Expand);
560  }
561 
562  setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
563  ISD::VP_TRUNCATE, ISD::VP_SETCC},
564  VT, Custom);
566 
568  ISD::VECTOR_SPLICE, VT,
569  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
570  }
571 
572  for (MVT VT : IntVecVTs) {
573  if (!isTypeLegal(VT))
574  continue;
575 
578 
579  // Vectors implement MULHS/MULHU.
581 
582  // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
583  if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
585 
587  Legal);
588 
590 
592 
594  setOperationAction(ISD::VP_BSWAP, VT, Expand);
595 
596  // Custom-lower extensions and truncations from/to mask types.
598  VT, Custom);
599 
600  // RVV has native int->float & float->int conversions where the
601  // element type sizes are within one power-of-two of each other. Any
602  // wider distances between type sizes have to be lowered as sequences
603  // which progressively narrow the gap in stages.
606  VT, Custom);
608  Custom);
609 
612 
613  // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
614  // nodes which truncate by one power of two at a time.
616 
617  // Custom-lower insert/extract operations to simplify patterns.
619  Custom);
620 
621  // Custom-lower reduction operations to set up the corresponding custom
622  // nodes' operands.
623  setOperationAction(IntegerVecReduceOps, VT, Custom);
624 
625  setOperationAction(IntegerVPOps, VT, Custom);
626 
628 
630  VT, Custom);
631 
633  {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
634  ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
635  VT, Custom);
636 
639  VT, Custom);
640 
643 
645 
646  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
647  setTruncStoreAction(VT, OtherVT, Expand);
649  VT, Expand);
650  }
651 
652  // Splice
654 
655  // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
656  // type that can represent the value exactly.
657  if (VT.getVectorElementType() != MVT::i64) {
658  MVT FloatEltVT =
660  EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
661  if (isTypeLegal(FloatVT)) {
663  Custom);
664  }
665  }
666  }
667 
668  // Expand various CCs to best match the RVV ISA, which natively supports UNE
669  // but no other unordered comparisons, and supports all ordered comparisons
670  // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
671  // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
672  // and we pattern-match those back to the "original", swapping operands once
673  // more. This way we catch both operations and both "vf" and "fv" forms with
674  // fewer patterns.
675  static const ISD::CondCode VFPCCToExpand[] = {
679  };
680 
681  // Sets common operation actions on RVV floating-point vector types.
682  const auto SetCommonVFPActions = [&](MVT VT) {
684  // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
685  // sizes are within one power-of-two of each other. Therefore conversions
686  // between vXf16 and vXf64 must be lowered as sequences which convert via
687  // vXf32.
689  // Custom-lower insert/extract operations to simplify patterns.
691  Custom);
692  // Expand various condition codes (explained above).
693  setCondCodeAction(VFPCCToExpand, VT, Expand);
694 
696 
699  VT, Custom);
700 
701  setOperationAction(FloatingPointVecReduceOps, VT, Custom);
702 
703  // Expand FP operations that need libcalls.
716 
718 
720 
722  VT, Custom);
723 
725  {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
726  ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
727  VT, Custom);
728 
731 
734  VT, Custom);
735 
737 
738  setOperationAction(FloatingPointVPOps, VT, Custom);
739  };
740 
741  // Sets common extload/truncstore actions on RVV floating-point vector
742  // types.
743  const auto SetCommonVFPExtLoadTruncStoreActions =
744  [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
745  for (auto SmallVT : SmallerVTs) {
746  setTruncStoreAction(VT, SmallVT, Expand);
747  setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
748  }
749  };
750 
751  if (Subtarget.hasVInstructionsF16()) {
752  for (MVT VT : F16VecVTs) {
753  if (!isTypeLegal(VT))
754  continue;
755  SetCommonVFPActions(VT);
756  }
757  }
758 
759  if (Subtarget.hasVInstructionsF32()) {
760  for (MVT VT : F32VecVTs) {
761  if (!isTypeLegal(VT))
762  continue;
763  SetCommonVFPActions(VT);
764  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
765  }
766  }
767 
768  if (Subtarget.hasVInstructionsF64()) {
769  for (MVT VT : F64VecVTs) {
770  if (!isTypeLegal(VT))
771  continue;
772  SetCommonVFPActions(VT);
773  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
774  SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
775  }
776  }
777 
778  if (Subtarget.useRVVForFixedLengthVectors()) {
780  if (!useRVVForFixedLengthVectorVT(VT))
781  continue;
782 
783  // By default everything must be expanded.
784  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
786  for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
787  setTruncStoreAction(VT, OtherVT, Expand);
789  OtherVT, VT, Expand);
790  }
791 
792  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
794  Custom);
795 
797  Custom);
798 
800  VT, Custom);
801 
803 
805 
807 
809 
811 
814  Custom);
815 
817  {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
818  Custom);
819 
822  VT, Custom);
824  Custom);
825 
826  // Operations below are different for between masks and other vectors.
827  if (VT.getVectorElementType() == MVT::i1) {
828  setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
829  ISD::OR, ISD::XOR},
830  VT, Custom);
831 
832  setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
833  ISD::VP_SETCC, ISD::VP_TRUNCATE},
834  VT, Custom);
835  continue;
836  }
837 
838  // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
839  // it before type legalization for i64 vectors on RV32. It will then be
840  // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
841  // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
842  // improvements first.
843  if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
846  }
847 
850 
853 
854  setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
855  ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
856  ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
857  ISD::VP_SCATTER},
858  VT, Custom);
859 
863  VT, Custom);
864 
867 
868  // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
869  if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
871 
874  Custom);
875 
878 
881 
882  // Custom-lower reduction operations to set up the corresponding custom
883  // nodes' operands.
887  VT, Custom);
888 
889  setOperationAction(IntegerVPOps, VT, Custom);
890 
891  // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
892  // type that can represent the value exactly.
893  if (VT.getVectorElementType() != MVT::i64) {
894  MVT FloatEltVT =
896  EVT FloatVT =
897  MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
898  if (isTypeLegal(FloatVT))
900  Custom);
901  }
902  }
903 
905  if (!useRVVForFixedLengthVectorVT(VT))
906  continue;
907 
908  // By default everything must be expanded.
909  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
911  for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
912  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
913  setTruncStoreAction(VT, OtherVT, Expand);
914  }
915 
916  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
918  Custom);
919 
923  VT, Custom);
924 
927  VT, Custom);
928 
929  setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
930  ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
931  ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
932  ISD::VP_SCATTER},
933  VT, Custom);
934 
938  VT, Custom);
939 
941 
944  VT, Custom);
945 
946  setCondCodeAction(VFPCCToExpand, VT, Expand);
947 
950 
952 
953  setOperationAction(FloatingPointVecReduceOps, VT, Custom);
954 
955  setOperationAction(FloatingPointVPOps, VT, Custom);
956  }
957 
958  // Custom-legalize bitcasts from fixed-length vectors to scalar types.
960  Custom);
961  if (Subtarget.hasStdExtZfh())
963  if (Subtarget.hasStdExtF())
965  if (Subtarget.hasStdExtD())
967  }
968  }
969 
970  if (Subtarget.hasForcedAtomics()) {
971  // Set atomic rmw/cas operations to expand to force __sync libcalls.
977  XLenVT, Expand);
978  }
979 
980  // Function alignments.
981  const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
982  setMinFunctionAlignment(FunctionAlignment);
983  setPrefFunctionAlignment(FunctionAlignment);
984 
986 
987  // Jumps are expensive, compared to logic
989 
992  if (Subtarget.is64Bit())
994 
995  if (Subtarget.hasStdExtF())
997 
998  if (Subtarget.hasStdExtZbb())
1000 
1001  if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1003 
1004  if (Subtarget.hasStdExtZbkb())
1006  if (Subtarget.hasStdExtZfh())
1008  if (Subtarget.hasStdExtF())
1011  if (Subtarget.hasVInstructions())
1013  ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1015  if (Subtarget.useRVVForFixedLengthVectors())
1017 
1018  setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1019  setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1020 }
1021 
1024  EVT VT) const {
1025  if (!VT.isVector())
1026  return getPointerTy(DL);
1027  if (Subtarget.hasVInstructions() &&
1028  (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1031 }
1032 
1033 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1034  return Subtarget.getXLenVT();
1035 }
1036 
1038  const CallInst &I,
1039  MachineFunction &MF,
1040  unsigned Intrinsic) const {
1041  auto &DL = I.getModule()->getDataLayout();
1042  switch (Intrinsic) {
1043  default:
1044  return false;
1045  case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1046  case Intrinsic::riscv_masked_atomicrmw_add_i32:
1047  case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1048  case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1049  case Intrinsic::riscv_masked_atomicrmw_max_i32:
1050  case Intrinsic::riscv_masked_atomicrmw_min_i32:
1051  case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1052  case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1053  case Intrinsic::riscv_masked_cmpxchg_i32:
1055  Info.memVT = MVT::i32;
1056  Info.ptrVal = I.getArgOperand(0);
1057  Info.offset = 0;
1058  Info.align = Align(4);
1061  return true;
1062  case Intrinsic::riscv_masked_strided_load:
1064  Info.ptrVal = I.getArgOperand(1);
1065  Info.memVT = getValueType(DL, I.getType()->getScalarType());
1066  Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
1069  return true;
1070  case Intrinsic::riscv_masked_strided_store:
1071  Info.opc = ISD::INTRINSIC_VOID;
1072  Info.ptrVal = I.getArgOperand(1);
1073  Info.memVT =
1074  getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
1075  Info.align = Align(
1076  DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
1077  8);
1080  return true;
1081  case Intrinsic::riscv_seg2_load:
1082  case Intrinsic::riscv_seg3_load:
1083  case Intrinsic::riscv_seg4_load:
1084  case Intrinsic::riscv_seg5_load:
1085  case Intrinsic::riscv_seg6_load:
1086  case Intrinsic::riscv_seg7_load:
1087  case Intrinsic::riscv_seg8_load:
1089  Info.ptrVal = I.getArgOperand(0);
1090  Info.memVT =
1091  getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
1092  Info.align =
1093  Align(DL.getTypeSizeInBits(
1094  I.getType()->getStructElementType(0)->getScalarType()) /
1095  8);
1098  return true;
1099  }
1100 }
1101 
1103  const AddrMode &AM, Type *Ty,
1104  unsigned AS,
1105  Instruction *I) const {
1106  // No global is ever allowed as a base.
1107  if (AM.BaseGV)
1108  return false;
1109 
1110  // RVV instructions only support register addressing.
1111  if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1112  return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1113 
1114  // Require a 12-bit signed offset.
1115  if (!isInt<12>(AM.BaseOffs))
1116  return false;
1117 
1118  switch (AM.Scale) {
1119  case 0: // "r+i" or just "i", depending on HasBaseReg.
1120  break;
1121  case 1:
1122  if (!AM.HasBaseReg) // allow "r+i".
1123  break;
1124  return false; // disallow "r+r" or "r+r+i".
1125  default:
1126  return false;
1127  }
1128 
1129  return true;
1130 }
1131 
1133  return isInt<12>(Imm);
1134 }
1135 
1137  return isInt<12>(Imm);
1138 }
1139 
1140 // On RV32, 64-bit integers are split into their high and low parts and held
1141 // in two different registers, so the trunc is free since the low register can
1142 // just be used.
1143 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1144 // isTruncateFree?
1145 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1146  if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1147  return false;
1148  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1149  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1150  return (SrcBits == 64 && DestBits == 32);
1151 }
1152 
1153 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1154  // We consider i64->i32 free on RV64 since we have good selection of W
1155  // instructions that make promoting operations back to i64 free in many cases.
1156  if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1157  !DstVT.isInteger())
1158  return false;
1159  unsigned SrcBits = SrcVT.getSizeInBits();
1160  unsigned DestBits = DstVT.getSizeInBits();
1161  return (SrcBits == 64 && DestBits == 32);
1162 }
1163 
1165  // Zexts are free if they can be combined with a load.
1166  // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1167  // poorly with type legalization of compares preferring sext.
1168  if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1169  EVT MemVT = LD->getMemoryVT();
1170  if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1171  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1172  LD->getExtensionType() == ISD::ZEXTLOAD))
1173  return true;
1174  }
1175 
1176  return TargetLowering::isZExtFree(Val, VT2);
1177 }
1178 
1180  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1181 }
1182 
1184  return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1185 }
1186 
1188  return Subtarget.hasStdExtZbb();
1189 }
1190 
1192  return Subtarget.hasStdExtZbb();
1193 }
1194 
1196  const Instruction &AndI) const {
1197  // We expect to be able to match a bit extraction instruction if the Zbs
1198  // extension is supported and the mask is a power of two. However, we
1199  // conservatively return false if the mask would fit in an ANDI instruction,
1200  // on the basis that it's possible the sinking+duplication of the AND in
1201  // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1202  // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1203  if (!Subtarget.hasStdExtZbs())
1204  return false;
1205  ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1206  if (!Mask)
1207  return false;
1208  return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1209 }
1210 
1212  EVT VT = Y.getValueType();
1213 
1214  // FIXME: Support vectors once we have tests.
1215  if (VT.isVector())
1216  return false;
1217 
1218  return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1219  !isa<ConstantSDNode>(Y);
1220 }
1221 
1223  // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1224  if (Subtarget.hasStdExtZbs())
1225  return X.getValueType().isScalarInteger();
1226  // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1227  auto *C = dyn_cast<ConstantSDNode>(Y);
1228  return C && C->getAPIntValue().ule(10);
1229 }
1230 
1232  Type *Ty) const {
1233  assert(Ty->isIntegerTy());
1234 
1235  unsigned BitSize = Ty->getIntegerBitWidth();
1236  if (BitSize > Subtarget.getXLen())
1237  return false;
1238 
1239  // Fast path, assume 32-bit immediates are cheap.
1240  int64_t Val = Imm.getSExtValue();
1241  if (isInt<32>(Val))
1242  return true;
1243 
1244  // A constant pool entry may be more aligned thant he load we're trying to
1245  // replace. If we don't support unaligned scalar mem, prefer the constant
1246  // pool.
1247  // TODO: Can the caller pass down the alignment?
1248  if (!Subtarget.enableUnalignedScalarMem())
1249  return true;
1250 
1251  // Prefer to keep the load if it would require many instructions.
1252  // This uses the same threshold we use for constant pools but doesn't
1253  // check useConstantPoolForLargeInts.
1254  // TODO: Should we keep the load only when we're definitely going to emit a
1255  // constant pool?
1256 
1257  RISCVMatInt::InstSeq Seq =
1258  RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
1259  return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1260 }
1261 
1265  unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1266  SelectionDAG &DAG) const {
1267  // One interesting pattern that we'd want to form is 'bit extract':
1268  // ((1 >> Y) & 1) ==/!= 0
1269  // But we also need to be careful not to try to reverse that fold.
1270 
1271  // Is this '((1 >> Y) & 1)'?
1272  if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1273  return false; // Keep the 'bit extract' pattern.
1274 
1275  // Will this be '((1 >> Y) & 1)' after the transform?
1276  if (NewShiftOpcode == ISD::SRL && CC->isOne())
1277  return true; // Do form the 'bit extract' pattern.
1278 
1279  // If 'X' is a constant, and we transform, then we will immediately
1280  // try to undo the fold, thus causing endless combine loop.
1281  // So only do the transform if X is not a constant. This matches the default
1282  // implementation of this function.
1283  return !XC;
1284 }
1285 
1286 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1287  switch (Opcode) {
1288  case Instruction::Add:
1289  case Instruction::Sub:
1290  case Instruction::Mul:
1291  case Instruction::And:
1292  case Instruction::Or:
1293  case Instruction::Xor:
1294  case Instruction::FAdd:
1295  case Instruction::FSub:
1296  case Instruction::FMul:
1297  case Instruction::FDiv:
1298  case Instruction::ICmp:
1299  case Instruction::FCmp:
1300  return true;
1301  case Instruction::Shl:
1302  case Instruction::LShr:
1303  case Instruction::AShr:
1304  case Instruction::UDiv:
1305  case Instruction::SDiv:
1306  case Instruction::URem:
1307  case Instruction::SRem:
1308  return Operand == 1;
1309  default:
1310  return false;
1311  }
1312 }
1313 
1314 
1316  if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1317  return false;
1318 
1319  if (canSplatOperand(I->getOpcode(), Operand))
1320  return true;
1321 
1322  auto *II = dyn_cast<IntrinsicInst>(I);
1323  if (!II)
1324  return false;
1325 
1326  switch (II->getIntrinsicID()) {
1327  case Intrinsic::fma:
1328  case Intrinsic::vp_fma:
1329  return Operand == 0 || Operand == 1;
1330  case Intrinsic::vp_shl:
1331  case Intrinsic::vp_lshr:
1332  case Intrinsic::vp_ashr:
1333  case Intrinsic::vp_udiv:
1334  case Intrinsic::vp_sdiv:
1335  case Intrinsic::vp_urem:
1336  case Intrinsic::vp_srem:
1337  return Operand == 1;
1338  // These intrinsics are commutative.
1339  case Intrinsic::vp_add:
1340  case Intrinsic::vp_mul:
1341  case Intrinsic::vp_and:
1342  case Intrinsic::vp_or:
1343  case Intrinsic::vp_xor:
1344  case Intrinsic::vp_fadd:
1345  case Intrinsic::vp_fmul:
1346  // These intrinsics have 'vr' versions.
1347  case Intrinsic::vp_sub:
1348  case Intrinsic::vp_fsub:
1349  case Intrinsic::vp_fdiv:
1350  return Operand == 0 || Operand == 1;
1351  default:
1352  return false;
1353  }
1354 }
1355 
1356 /// Check if sinking \p I's operands to I's basic block is profitable, because
1357 /// the operands can be folded into a target instruction, e.g.
1358 /// splats of scalars can fold into vector instructions.
1360  Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1361  using namespace llvm::PatternMatch;
1362 
1363  if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1364  return false;
1365 
1366  for (auto OpIdx : enumerate(I->operands())) {
1367  if (!canSplatOperand(I, OpIdx.index()))
1368  continue;
1369 
1370  Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1371  // Make sure we are not already sinking this operand
1372  if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1373  continue;
1374 
1375  // We are looking for a splat that can be sunk.
1377  m_Undef(), m_ZeroMask())))
1378  continue;
1379 
1380  // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1381  // and vector registers
1382  for (Use &U : Op->uses()) {
1383  Instruction *Insn = cast<Instruction>(U.getUser());
1384  if (!canSplatOperand(Insn, U.getOperandNo()))
1385  return false;
1386  }
1387 
1388  Ops.push_back(&Op->getOperandUse(0));
1389  Ops.push_back(&OpIdx.value());
1390  }
1391  return true;
1392 }
1393 
1395  unsigned Opc = VecOp.getOpcode();
1396 
1397  // Assume target opcodes can't be scalarized.
1398  // TODO - do we have any exceptions?
1399  if (Opc >= ISD::BUILTIN_OP_END)
1400  return false;
1401 
1402  // If the vector op is not supported, try to convert to scalar.
1403  EVT VecVT = VecOp.getValueType();
1404  if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
1405  return true;
1406 
1407  // If the vector op is supported, but the scalar op is not, the transform may
1408  // not be worthwhile.
1409  EVT ScalarVT = VecVT.getScalarType();
1410  return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
1411 }
1412 
1414  const GlobalAddressSDNode *GA) const {
1415  // In order to maximise the opportunity for common subexpression elimination,
1416  // keep a separate ADD node for the global address offset instead of folding
1417  // it in the global address node. Later peephole optimisations may choose to
1418  // fold it back in when profitable.
1419  return false;
1420 }
1421 
1423  bool ForCodeSize) const {
1424  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1425  if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1426  return false;
1427  if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1428  return false;
1429  if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1430  return false;
1431  return Imm.isZero();
1432 }
1433 
1434 // TODO: This is very conservative.
1436  unsigned Index) const {
1438  return false;
1439 
1440  // Only support extracting a fixed from a fixed vector for now.
1441  if (ResVT.isScalableVector() || SrcVT.isScalableVector())
1442  return false;
1443 
1444  unsigned ResElts = ResVT.getVectorNumElements();
1445  unsigned SrcElts = SrcVT.getVectorNumElements();
1446 
1447  // Convervatively only handle extracting half of a vector.
1448  // TODO: Relax this.
1449  if ((ResElts * 2) != SrcElts)
1450  return false;
1451 
1452  // The smallest type we can slide is i8.
1453  // TODO: We can extract index 0 from a mask vector without a slide.
1454  if (ResVT.getVectorElementType() == MVT::i1)
1455  return false;
1456 
1457  // Slide can support arbitrary index, but we only treat vslidedown.vi as
1458  // cheap.
1459  if (Index >= 32)
1460  return false;
1461 
1462  // TODO: We can do arbitrary slidedowns, but for now only support extracting
1463  // the upper half of a vector until we have more test coverage.
1464  return Index == 0 || Index == ResElts;
1465 }
1466 
1468  return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1469  (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1470  (VT == MVT::f64 && Subtarget.hasStdExtD());
1471 }
1472 
1475  EVT VT) const {
1476  // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1477  // We might still end up using a GPR but that will be decided based on ABI.
1478  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1479  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1480  return MVT::f32;
1481 
1483 }
1484 
1487  EVT VT) const {
1488  // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1489  // We might still end up using a GPR but that will be decided based on ABI.
1490  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1491  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1492  return 1;
1493 
1495 }
1496 
1497 // Changes the condition code and swaps operands if necessary, so the SetCC
1498 // operation matches one of the comparisons supported directly by branches
1499 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1500 // with 1/-1.
1502  ISD::CondCode &CC, SelectionDAG &DAG) {
1503  // If this is a single bit test that can't be handled by ANDI, shift the
1504  // bit to be tested to the MSB and perform a signed compare with 0.
1506  LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1507  isa<ConstantSDNode>(LHS.getOperand(1))) {
1508  uint64_t Mask = LHS.getConstantOperandVal(1);
1509  if (isPowerOf2_64(Mask) && !isInt<12>(Mask)) {
1511  unsigned ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1512  LHS = LHS.getOperand(0);
1513  if (ShAmt != 0)
1514  LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1515  DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1516  return;
1517  }
1518  }
1519 
1520  if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1521  int64_t C = RHSC->getSExtValue();
1522  switch (CC) {
1523  default: break;
1524  case ISD::SETGT:
1525  // Convert X > -1 to X >= 0.
1526  if (C == -1) {
1527  RHS = DAG.getConstant(0, DL, RHS.getValueType());
1528  CC = ISD::SETGE;
1529  return;
1530  }
1531  break;
1532  case ISD::SETLT:
1533  // Convert X < 1 to 0 <= X.
1534  if (C == 1) {
1535  RHS = LHS;
1536  LHS = DAG.getConstant(0, DL, RHS.getValueType());
1537  CC = ISD::SETGE;
1538  return;
1539  }
1540  break;
1541  }
1542  }
1543 
1544  switch (CC) {
1545  default:
1546  break;
1547  case ISD::SETGT:
1548  case ISD::SETLE:
1549  case ISD::SETUGT:
1550  case ISD::SETULE:
1552  std::swap(LHS, RHS);
1553  break;
1554  }
1555 }
1556 
1558  assert(VT.isScalableVector() && "Expecting a scalable vector type");
1559  unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1560  if (VT.getVectorElementType() == MVT::i1)
1561  KnownSize *= 8;
1562 
1563  switch (KnownSize) {
1564  default:
1565  llvm_unreachable("Invalid LMUL.");
1566  case 8:
1567  return RISCVII::VLMUL::LMUL_F8;
1568  case 16:
1569  return RISCVII::VLMUL::LMUL_F4;
1570  case 32:
1571  return RISCVII::VLMUL::LMUL_F2;
1572  case 64:
1573  return RISCVII::VLMUL::LMUL_1;
1574  case 128:
1575  return RISCVII::VLMUL::LMUL_2;
1576  case 256:
1577  return RISCVII::VLMUL::LMUL_4;
1578  case 512:
1579  return RISCVII::VLMUL::LMUL_8;
1580  }
1581 }
1582 
1584  switch (LMul) {
1585  default:
1586  llvm_unreachable("Invalid LMUL.");
1591  return RISCV::VRRegClassID;
1593  return RISCV::VRM2RegClassID;
1595  return RISCV::VRM4RegClassID;
1597  return RISCV::VRM8RegClassID;
1598  }
1599 }
1600 
1602  RISCVII::VLMUL LMUL = getLMUL(VT);
1603  if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1607  static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1608  "Unexpected subreg numbering");
1609  return RISCV::sub_vrm1_0 + Index;
1610  }
1611  if (LMUL == RISCVII::VLMUL::LMUL_2) {
1612  static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1613  "Unexpected subreg numbering");
1614  return RISCV::sub_vrm2_0 + Index;
1615  }
1616  if (LMUL == RISCVII::VLMUL::LMUL_4) {
1617  static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1618  "Unexpected subreg numbering");
1619  return RISCV::sub_vrm4_0 + Index;
1620  }
1621  llvm_unreachable("Invalid vector type.");
1622 }
1623 
1625  if (VT.getVectorElementType() == MVT::i1)
1626  return RISCV::VRRegClassID;
1627  return getRegClassIDForLMUL(getLMUL(VT));
1628 }
1629 
1630 // Attempt to decompose a subvector insert/extract between VecVT and
1631 // SubVecVT via subregister indices. Returns the subregister index that
1632 // can perform the subvector insert/extract with the given element index, as
1633 // well as the index corresponding to any leftover subvectors that must be
1634 // further inserted/extracted within the register class for SubVecVT.
1635 std::pair<unsigned, unsigned>
1637  MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1638  const RISCVRegisterInfo *TRI) {
1639  static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1640  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1641  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1642  "Register classes not ordered");
1643  unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1644  unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1645  // Try to compose a subregister index that takes us from the incoming
1646  // LMUL>1 register class down to the outgoing one. At each step we half
1647  // the LMUL:
1648  // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1649  // Note that this is not guaranteed to find a subregister index, such as
1650  // when we are extracting from one VR type to another.
1651  unsigned SubRegIdx = RISCV::NoSubRegister;
1652  for (const unsigned RCID :
1653  {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1654  if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1655  VecVT = VecVT.getHalfNumVectorElementsVT();
1656  bool IsHi =
1657  InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1658  SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1659  getSubregIndexByMVT(VecVT, IsHi));
1660  if (IsHi)
1661  InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1662  }
1663  return {SubRegIdx, InsertExtractIdx};
1664 }
1665 
1666 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1667 // stores for those types.
1668 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1669  return !Subtarget.useRVVForFixedLengthVectors() ||
1671 }
1672 
1674  if (ScalarTy->isPointerTy())
1675  return true;
1676 
1677  if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1678  ScalarTy->isIntegerTy(32))
1679  return true;
1680 
1681  if (ScalarTy->isIntegerTy(64))
1682  return Subtarget.hasVInstructionsI64();
1683 
1684  if (ScalarTy->isHalfTy())
1685  return Subtarget.hasVInstructionsF16();
1686  if (ScalarTy->isFloatTy())
1687  return Subtarget.hasVInstructionsF32();
1688  if (ScalarTy->isDoubleTy())
1689  return Subtarget.hasVInstructionsF64();
1690 
1691  return false;
1692 }
1693 
1695  assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1696  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
1697  "Unexpected opcode");
1698  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
1699  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
1701  RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
1702  if (!II)
1703  return SDValue();
1704  return Op.getOperand(II->VLOperand + 1 + HasChain);
1705 }
1706 
1707 static bool useRVVForFixedLengthVectorVT(MVT VT,
1708  const RISCVSubtarget &Subtarget) {
1709  assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1710  if (!Subtarget.useRVVForFixedLengthVectors())
1711  return false;
1712 
1713  // We only support a set of vector types with a consistent maximum fixed size
1714  // across all supported vector element types to avoid legalization issues.
1715  // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1716  // fixed-length vector type we support is 1024 bytes.
1717  if (VT.getFixedSizeInBits() > 1024 * 8)
1718  return false;
1719 
1720  unsigned MinVLen = Subtarget.getRealMinVLen();
1721 
1722  MVT EltVT = VT.getVectorElementType();
1723 
1724  // Don't use RVV for vectors we cannot scalarize if required.
1725  switch (EltVT.SimpleTy) {
1726  // i1 is supported but has different rules.
1727  default:
1728  return false;
1729  case MVT::i1:
1730  // Masks can only use a single register.
1731  if (VT.getVectorNumElements() > MinVLen)
1732  return false;
1733  MinVLen /= 8;
1734  break;
1735  case MVT::i8:
1736  case MVT::i16:
1737  case MVT::i32:
1738  break;
1739  case MVT::i64:
1740  if (!Subtarget.hasVInstructionsI64())
1741  return false;
1742  break;
1743  case MVT::f16:
1744  if (!Subtarget.hasVInstructionsF16())
1745  return false;
1746  break;
1747  case MVT::f32:
1748  if (!Subtarget.hasVInstructionsF32())
1749  return false;
1750  break;
1751  case MVT::f64:
1752  if (!Subtarget.hasVInstructionsF64())
1753  return false;
1754  break;
1755  }
1756 
1757  // Reject elements larger than ELEN.
1758  if (EltVT.getSizeInBits() > Subtarget.getELEN())
1759  return false;
1760 
1761  unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1762  // Don't use RVV for types that don't fit.
1763  if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1764  return false;
1765 
1766  // TODO: Perhaps an artificial restriction, but worth having whilst getting
1767  // the base fixed length RVV support in place.
1768  if (!VT.isPow2VectorType())
1769  return false;
1770 
1771  return true;
1772 }
1773 
1774 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1776 }
1777 
1778 // Return the largest legal scalable vector type that matches VT's element type.
1780  const RISCVSubtarget &Subtarget) {
1781  // This may be called before legal types are setup.
1782  assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1783  useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1784  "Expected legal fixed length vector!");
1785 
1786  unsigned MinVLen = Subtarget.getRealMinVLen();
1787  unsigned MaxELen = Subtarget.getELEN();
1788 
1789  MVT EltVT = VT.getVectorElementType();
1790  switch (EltVT.SimpleTy) {
1791  default:
1792  llvm_unreachable("unexpected element type for RVV container");
1793  case MVT::i1:
1794  case MVT::i8:
1795  case MVT::i16:
1796  case MVT::i32:
1797  case MVT::i64:
1798  case MVT::f16:
1799  case MVT::f32:
1800  case MVT::f64: {
1801  // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1802  // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1803  // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1804  unsigned NumElts =
1805  (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1806  NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1807  assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1808  return MVT::getScalableVectorVT(EltVT, NumElts);
1809  }
1810  }
1811 }
1812 
1814  const RISCVSubtarget &Subtarget) {
1816  Subtarget);
1817 }
1818 
1821 }
1822 
1823 // Grow V to consume an entire RVV register.
1825  const RISCVSubtarget &Subtarget) {
1826  assert(VT.isScalableVector() &&
1827  "Expected to convert into a scalable vector!");
1829  "Expected a fixed length vector operand!");
1830  SDLoc DL(V);
1831  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1832  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1833 }
1834 
1835 // Shrink V so it's just big enough to maintain a VT's worth of data.
1837  const RISCVSubtarget &Subtarget) {
1838  assert(VT.isFixedLengthVector() &&
1839  "Expected to convert into a fixed length vector!");
1841  "Expected a scalable vector operand!");
1842  SDLoc DL(V);
1843  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1844  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1845 }
1846 
1847 /// Return the type of the mask type suitable for masking the provided
1848 /// vector type. This is simply an i1 element type vector of the same
1849 /// (possibly scalable) length.
1850 static MVT getMaskTypeFor(MVT VecVT) {
1851  assert(VecVT.isVector());
1852  ElementCount EC = VecVT.getVectorElementCount();
1853  return MVT::getVectorVT(MVT::i1, EC);
1854 }
1855 
1856 /// Creates an all ones mask suitable for masking a vector of type VecTy with
1857 /// vector length VL. .
1859  SelectionDAG &DAG) {
1860  MVT MaskVT = getMaskTypeFor(VecVT);
1861  return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1862 }
1863 
1865  const RISCVSubtarget &Subtarget) {
1866  return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
1867 }
1868 
1869 static std::pair<SDValue, SDValue>
1870 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1871  const RISCVSubtarget &Subtarget) {
1872  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1873  SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
1874  SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
1875  return {Mask, VL};
1876 }
1877 
1878 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1879 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1880 // the vector type that it is contained in.
1881 static std::pair<SDValue, SDValue>
1882 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1883  const RISCVSubtarget &Subtarget) {
1884  if (VecVT.isFixedLengthVector())
1885  return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
1886  Subtarget);
1887  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1888  MVT XLenVT = Subtarget.getXLenVT();
1889  SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
1890  SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
1891  return {Mask, VL};
1892 }
1893 
1894 // As above but assuming the given type is a scalable vector type.
1895 static std::pair<SDValue, SDValue>
1897  const RISCVSubtarget &Subtarget) {
1898  assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1899  return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1900 }
1901 
1902 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1903 // of either is (currently) supported. This can get us into an infinite loop
1904 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1905 // as a ..., etc.
1906 // Until either (or both) of these can reliably lower any node, reporting that
1907 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1908 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1909 // which is not desirable.
1911  EVT VT, unsigned DefinedValues) const {
1912  return false;
1913 }
1914 
1916  const RISCVSubtarget &Subtarget) {
1917  // RISCV FP-to-int conversions saturate to the destination register size, but
1918  // don't produce 0 for nan. We can use a conversion instruction and fix the
1919  // nan case with a compare and a select.
1920  SDValue Src = Op.getOperand(0);
1921 
1922  MVT DstVT = Op.getSimpleValueType();
1923  EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1924 
1925  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1926 
1927  if (!DstVT.isVector()) {
1928  unsigned Opc;
1929  if (SatVT == DstVT)
1930  Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
1931  else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1932  Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
1933  else
1934  return SDValue();
1935  // FIXME: Support other SatVTs by clamping before or after the conversion.
1936 
1937  SDLoc DL(Op);
1938  SDValue FpToInt = DAG.getNode(
1939  Opc, DL, DstVT, Src,
1940  DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
1941 
1942  if (Opc == RISCVISD::FCVT_WU_RV64)
1943  FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
1944 
1945  SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1946  return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
1948  }
1949 
1950  // Vectors.
1951 
1952  MVT DstEltVT = DstVT.getVectorElementType();
1953  MVT SrcVT = Src.getSimpleValueType();
1954  MVT SrcEltVT = SrcVT.getVectorElementType();
1955  unsigned SrcEltSize = SrcEltVT.getSizeInBits();
1956  unsigned DstEltSize = DstEltVT.getSizeInBits();
1957 
1958  // Only handle saturating to the destination type.
1959  if (SatVT != DstEltVT)
1960  return SDValue();
1961 
1962  // FIXME: Don't support narrowing by more than 1 steps for now.
1963  if (SrcEltSize > (2 * DstEltSize))
1964  return SDValue();
1965 
1966  MVT DstContainerVT = DstVT;
1967  MVT SrcContainerVT = SrcVT;
1968  if (DstVT.isFixedLengthVector()) {
1969  DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
1970  SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
1971  assert(DstContainerVT.getVectorElementCount() ==
1972  SrcContainerVT.getVectorElementCount() &&
1973  "Expected same element count");
1974  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1975  }
1976 
1977  SDLoc DL(Op);
1978 
1979  auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
1980 
1981  SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
1982  {Src, Src, DAG.getCondCode(ISD::SETNE),
1983  DAG.getUNDEF(Mask.getValueType()), Mask, VL});
1984 
1985  // Need to widen by more than 1 step, promote the FP type, then do a widening
1986  // convert.
1987  if (DstEltSize > (2 * SrcEltSize)) {
1988  assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
1989  MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
1990  Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
1991  }
1992 
1993  unsigned RVVOpc =
1995  SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
1996 
1997  SDValue SplatZero = DAG.getNode(
1998  RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
1999  DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2000  Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
2001  Res, VL);
2002 
2003  if (DstVT.isFixedLengthVector())
2004  Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2005 
2006  return Res;
2007 }
2008 
2010  switch (Opc) {
2011  case ISD::FROUNDEVEN:
2012  case ISD::VP_FROUNDEVEN:
2013  return RISCVFPRndMode::RNE;
2014  case ISD::FTRUNC:
2015  case ISD::VP_FROUNDTOZERO:
2016  return RISCVFPRndMode::RTZ;
2017  case ISD::FFLOOR:
2018  case ISD::VP_FFLOOR:
2019  return RISCVFPRndMode::RDN;
2020  case ISD::FCEIL:
2021  case ISD::VP_FCEIL:
2022  return RISCVFPRndMode::RUP;
2023  case ISD::FROUND:
2024  case ISD::VP_FROUND:
2025  return RISCVFPRndMode::RMM;
2026  case ISD::FRINT:
2027  return RISCVFPRndMode::DYN;
2028  }
2029 
2030  return RISCVFPRndMode::Invalid;
2031 }
2032 
2033 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2034 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2035 // the integer domain and back. Taking care to avoid converting values that are
2036 // nan or already correct.
2037 static SDValue
2039  const RISCVSubtarget &Subtarget) {
2040  MVT VT = Op.getSimpleValueType();
2041  assert(VT.isVector() && "Unexpected type");
2042 
2043  SDLoc DL(Op);
2044 
2045  SDValue Src = Op.getOperand(0);
2046 
2047  MVT ContainerVT = VT;
2048  if (VT.isFixedLengthVector()) {
2049  ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2050  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2051  }
2052 
2053  SDValue Mask, VL;
2054  if (Op->isVPOpcode()) {
2055  Mask = Op.getOperand(1);
2056  VL = Op.getOperand(2);
2057  } else {
2058  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2059  }
2060 
2061  // Freeze the source since we are increasing the number of uses.
2062  Src = DAG.getFreeze(Src);
2063 
2064  // We do the conversion on the absolute value and fix the sign at the end.
2065  SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2066 
2067  // Determine the largest integer that can be represented exactly. This and
2068  // values larger than it don't have any fractional bits so don't need to
2069  // be converted.
2070  const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2071  unsigned Precision = APFloat::semanticsPrecision(FltSem);
2072  APFloat MaxVal = APFloat(FltSem);
2073  MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2074  /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2075  SDValue MaxValNode =
2076  DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2077  SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2078  DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2079 
2080  // If abs(Src) was larger than MaxVal or nan, keep it.
2081  MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2082  Mask =
2083  DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2084  {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2085  Mask, Mask, VL});
2086 
2087  // Truncate to integer and convert back to FP.
2088  MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2089  MVT XLenVT = Subtarget.getXLenVT();
2090  SDValue Truncated;
2091 
2092  switch (Op.getOpcode()) {
2093  default:
2094  llvm_unreachable("Unexpected opcode");
2095  case ISD::FCEIL:
2096  case ISD::VP_FCEIL:
2097  case ISD::FFLOOR:
2098  case ISD::VP_FFLOOR:
2099  case ISD::FROUND:
2100  case ISD::FROUNDEVEN:
2101  case ISD::VP_FROUND:
2102  case ISD::VP_FROUNDEVEN:
2103  case ISD::VP_FROUNDTOZERO: {
2104  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2106  Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2107  DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2108  break;
2109  }
2110  case ISD::FTRUNC:
2111  Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2112  Mask, VL);
2113  break;
2114  case ISD::VP_FRINT:
2115  Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2116  break;
2117  case ISD::VP_FNEARBYINT:
2118  Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2119  Mask, VL);
2120  break;
2121  }
2122 
2123  // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2124  if (Op.getOpcode() != ISD::VP_FNEARBYINT)
2125  Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2126  Mask, VL);
2127 
2128  // Restore the original sign so that -0.0 is preserved.
2129  Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2130  Src, Src, Mask, VL);
2131 
2132  if (!VT.isFixedLengthVector())
2133  return Truncated;
2134 
2135  return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2136 }
2137 
2138 static SDValue
2140  const RISCVSubtarget &Subtarget) {
2141  MVT VT = Op.getSimpleValueType();
2142  if (VT.isVector())
2143  return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
2144 
2145  if (DAG.shouldOptForSize())
2146  return SDValue();
2147 
2148  SDLoc DL(Op);
2149  SDValue Src = Op.getOperand(0);
2150 
2151  // Create an integer the size of the mantissa with the MSB set. This and all
2152  // values larger than it don't have any fractional bits so don't need to be
2153  // converted.
2154  const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
2155  unsigned Precision = APFloat::semanticsPrecision(FltSem);
2156  APFloat MaxVal = APFloat(FltSem);
2157  MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2158  /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2159  SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
2160 
2161  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2162  return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
2163  DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
2164 }
2165 
2166 struct VIDSequence {
2167  int64_t StepNumerator;
2169  int64_t Addend;
2170 };
2171 
2172 static std::optional<uint64_t> getExactInteger(const APFloat &APF,
2173  uint32_t BitWidth) {
2174  APSInt ValInt(BitWidth, !APF.isNegative());
2175  // We use an arbitrary rounding mode here. If a floating-point is an exact
2176  // integer (e.g., 1.0), the rounding mode does not affect the output value. If
2177  // the rounding mode changes the output value, then it is not an exact
2178  // integer.
2179  RoundingMode ArbitraryRM = RoundingMode::TowardZero;
2180  bool IsExact;
2181  // If it is out of signed integer range, it will return an invalid operation.
2182  // If it is not an exact integer, IsExact is false.
2183  if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
2185  !IsExact)
2186  return None;
2187  return ValInt.extractBitsAsZExtValue(BitWidth, 0);
2188 }
2189 
2190 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
2191 // to the (non-zero) step S and start value X. This can be then lowered as the
2192 // RVV sequence (VID * S) + X, for example.
2193 // The step S is represented as an integer numerator divided by a positive
2194 // denominator. Note that the implementation currently only identifies
2195 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
2196 // cannot detect 2/3, for example.
2197 // Note that this method will also match potentially unappealing index
2198 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
2199 // determine whether this is worth generating code for.
2200 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
2201  unsigned NumElts = Op.getNumOperands();
2202  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
2203  bool IsInteger = Op.getValueType().isInteger();
2204 
2205  std::optional<unsigned> SeqStepDenom;
2206  std::optional<int64_t> SeqStepNum, SeqAddend;
2207  std::optional<std::pair<uint64_t, unsigned>> PrevElt;
2208  unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
2209  for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2210  // Assume undef elements match the sequence; we just have to be careful
2211  // when interpolating across them.
2212  if (Op.getOperand(Idx).isUndef())
2213  continue;
2214 
2215  uint64_t Val;
2216  if (IsInteger) {
2217  // The BUILD_VECTOR must be all constants.
2218  if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
2219  return None;
2220  Val = Op.getConstantOperandVal(Idx) &
2221  maskTrailingOnes<uint64_t>(EltSizeInBits);
2222  } else {
2223  // The BUILD_VECTOR must be all constants.
2224  if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
2225  return None;
2226  if (auto ExactInteger = getExactInteger(
2227  cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
2228  EltSizeInBits))
2229  Val = *ExactInteger;
2230  else
2231  return None;
2232  }
2233 
2234  if (PrevElt) {
2235  // Calculate the step since the last non-undef element, and ensure
2236  // it's consistent across the entire sequence.
2237  unsigned IdxDiff = Idx - PrevElt->second;
2238  int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
2239 
2240  // A zero-value value difference means that we're somewhere in the middle
2241  // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
2242  // step change before evaluating the sequence.
2243  if (ValDiff == 0)
2244  continue;
2245 
2246  int64_t Remainder = ValDiff % IdxDiff;
2247  // Normalize the step if it's greater than 1.
2248  if (Remainder != ValDiff) {
2249  // The difference must cleanly divide the element span.
2250  if (Remainder != 0)
2251  return None;
2252  ValDiff /= IdxDiff;
2253  IdxDiff = 1;
2254  }
2255 
2256  if (!SeqStepNum)
2257  SeqStepNum = ValDiff;
2258  else if (ValDiff != SeqStepNum)
2259  return None;
2260 
2261  if (!SeqStepDenom)
2262  SeqStepDenom = IdxDiff;
2263  else if (IdxDiff != *SeqStepDenom)
2264  return None;
2265  }
2266 
2267  // Record this non-undef element for later.
2268  if (!PrevElt || PrevElt->first != Val)
2269  PrevElt = std::make_pair(Val, Idx);
2270  }
2271 
2272  // We need to have logged a step for this to count as a legal index sequence.
2273  if (!SeqStepNum || !SeqStepDenom)
2274  return None;
2275 
2276  // Loop back through the sequence and validate elements we might have skipped
2277  // while waiting for a valid step. While doing this, log any sequence addend.
2278  for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2279  if (Op.getOperand(Idx).isUndef())
2280  continue;
2281  uint64_t Val;
2282  if (IsInteger) {
2283  Val = Op.getConstantOperandVal(Idx) &
2284  maskTrailingOnes<uint64_t>(EltSizeInBits);
2285  } else {
2286  Val = *getExactInteger(
2287  cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
2288  EltSizeInBits);
2289  }
2290  uint64_t ExpectedVal =
2291  (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
2292  int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
2293  if (!SeqAddend)
2294  SeqAddend = Addend;
2295  else if (Addend != SeqAddend)
2296  return None;
2297  }
2298 
2299  assert(SeqAddend && "Must have an addend if we have a step");
2300 
2301  return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
2302 }
2303 
2304 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
2305 // and lower it as a VRGATHER_VX_VL from the source vector.
2306 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
2307  SelectionDAG &DAG,
2308  const RISCVSubtarget &Subtarget) {
2309  if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2310  return SDValue();
2311  SDValue Vec = SplatVal.getOperand(0);
2312  // Only perform this optimization on vectors of the same size for simplicity.
2313  // Don't perform this optimization for i1 vectors.
2314  // FIXME: Support i1 vectors, maybe by promoting to i8?
2315  if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
2316  return SDValue();
2317  SDValue Idx = SplatVal.getOperand(1);
2318  // The index must be a legal type.
2319  if (Idx.getValueType() != Subtarget.getXLenVT())
2320  return SDValue();
2321 
2322  MVT ContainerVT = VT;
2323  if (VT.isFixedLengthVector()) {
2324  ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2325  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2326  }
2327 
2328  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2329 
2330  SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
2331  Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
2332 
2333  if (!VT.isFixedLengthVector())
2334  return Gather;
2335 
2336  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2337 }
2338 
2340  const RISCVSubtarget &Subtarget) {
2341  MVT VT = Op.getSimpleValueType();
2342  assert(VT.isFixedLengthVector() && "Unexpected vector!");
2343 
2344  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2345 
2346  SDLoc DL(Op);
2347  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2348 
2349  MVT XLenVT = Subtarget.getXLenVT();
2350  unsigned NumElts = Op.getNumOperands();
2351 
2352  if (VT.getVectorElementType() == MVT::i1) {
2353  if (ISD::isBuildVectorAllZeros(Op.getNode())) {
2354  SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
2355  return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
2356  }
2357 
2358  if (ISD::isBuildVectorAllOnes(Op.getNode())) {
2359  SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
2360  return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
2361  }
2362 
2363  // Lower constant mask BUILD_VECTORs via an integer vector type, in
2364  // scalar integer chunks whose bit-width depends on the number of mask
2365  // bits and XLEN.
2366  // First, determine the most appropriate scalar integer type to use. This
2367  // is at most XLenVT, but may be shrunk to a smaller vector element type
2368  // according to the size of the final vector - use i8 chunks rather than
2369  // XLenVT if we're producing a v8i1. This results in more consistent
2370  // codegen across RV32 and RV64.
2371  unsigned NumViaIntegerBits =
2372  std::min(std::max(NumElts, 8u), Subtarget.getXLen());
2373  NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
2374  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
2375  // If we have to use more than one INSERT_VECTOR_ELT then this
2376  // optimization is likely to increase code size; avoid peforming it in
2377  // such a case. We can use a load from a constant pool in this case.
2378  if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
2379  return SDValue();
2380  // Now we can create our integer vector type. Note that it may be larger
2381  // than the resulting mask type: v4i1 would use v1i8 as its integer type.
2382  MVT IntegerViaVecVT =
2383  MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
2384  divideCeil(NumElts, NumViaIntegerBits));
2385 
2386  uint64_t Bits = 0;
2387  unsigned BitPos = 0, IntegerEltIdx = 0;
2388  SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
2389 
2390  for (unsigned I = 0; I < NumElts; I++, BitPos++) {
2391  // Once we accumulate enough bits to fill our scalar type, insert into
2392  // our vector and clear our accumulated data.
2393  if (I != 0 && I % NumViaIntegerBits == 0) {
2394  if (NumViaIntegerBits <= 32)
2395  Bits = SignExtend64<32>(Bits);
2396  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2397  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
2398  Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2399  Bits = 0;
2400  BitPos = 0;
2401  IntegerEltIdx++;
2402  }
2403  SDValue V = Op.getOperand(I);
2404  bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
2405  Bits |= ((uint64_t)BitValue << BitPos);
2406  }
2407 
2408  // Insert the (remaining) scalar value into position in our integer
2409  // vector type.
2410  if (NumViaIntegerBits <= 32)
2411  Bits = SignExtend64<32>(Bits);
2412  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2413  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
2414  DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2415 
2416  if (NumElts < NumViaIntegerBits) {
2417  // If we're producing a smaller vector than our minimum legal integer
2418  // type, bitcast to the equivalent (known-legal) mask type, and extract
2419  // our final mask.
2420  assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
2421  Vec = DAG.getBitcast(MVT::v8i1, Vec);
2422  Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
2423  DAG.getConstant(0, DL, XLenVT));
2424  } else {
2425  // Else we must have produced an integer type with the same size as the
2426  // mask type; bitcast for the final result.
2427  assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
2428  Vec = DAG.getBitcast(VT, Vec);
2429  }
2430 
2431  return Vec;
2432  }
2433 
2434  // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
2435  // vector type, we have a legal equivalently-sized i8 type, so we can use
2436  // that.
2437  MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
2438  SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
2439 
2440  SDValue WideVec;
2441  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2442  // For a splat, perform a scalar truncate before creating the wider
2443  // vector.
2444  assert(Splat.getValueType() == XLenVT &&
2445  "Unexpected type for i1 splat value");
2446  Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
2447  DAG.getConstant(1, DL, XLenVT));
2448  WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
2449  } else {
2450  SmallVector<SDValue, 8> Ops(Op->op_values());
2451  WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
2452  SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
2453  WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
2454  }
2455 
2456  return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
2457  }
2458 
2459  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2460  if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
2461  return Gather;
2462  unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
2464  Splat =
2465  DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
2466  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2467  }
2468 
2469  // Try and match index sequences, which we can lower to the vid instruction
2470  // with optional modifications. An all-undef vector is matched by
2471  // getSplatValue, above.
2472  if (auto SimpleVID = isSimpleVIDSequence(Op)) {
2473  int64_t StepNumerator = SimpleVID->StepNumerator;
2474  unsigned StepDenominator = SimpleVID->StepDenominator;
2475  int64_t Addend = SimpleVID->Addend;
2476 
2477  assert(StepNumerator != 0 && "Invalid step");
2478  bool Negate = false;
2479  int64_t SplatStepVal = StepNumerator;
2480  unsigned StepOpcode = ISD::MUL;
2481  if (StepNumerator != 1) {
2482  if (isPowerOf2_64(std::abs(StepNumerator))) {
2483  Negate = StepNumerator < 0;
2484  StepOpcode = ISD::SHL;
2485  SplatStepVal = Log2_64(std::abs(StepNumerator));
2486  }
2487  }
2488 
2489  // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
2490  // threshold since it's the immediate value many RVV instructions accept.
2491  // There is no vmul.vi instruction so ensure multiply constant can fit in
2492  // a single addi instruction.
2493  if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
2494  (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
2495  isPowerOf2_32(StepDenominator) &&
2496  (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
2497  MVT VIDVT =
2499  MVT VIDContainerVT =
2500  getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
2501  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
2502  // Convert right out of the scalable type so we can use standard ISD
2503  // nodes for the rest of the computation. If we used scalable types with
2504  // these, we'd lose the fixed-length vector info and generate worse
2505  // vsetvli code.
2506  VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
2507  if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
2508  (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
2509  SDValue SplatStep = DAG.getSplatBuildVector(
2510  VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
2511  VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
2512  }
2513  if (StepDenominator != 1) {
2514  SDValue SplatStep = DAG.getSplatBuildVector(
2515  VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
2516  VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
2517  }
2518  if (Addend != 0 || Negate) {
2519  SDValue SplatAddend = DAG.getSplatBuildVector(
2520  VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT));
2521  VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
2522  VID);
2523  }
2524  if (VT.isFloatingPoint()) {
2525  // TODO: Use vfwcvt to reduce register pressure.
2526  VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
2527  }
2528  return VID;
2529  }
2530  }
2531 
2532  // Attempt to detect "hidden" splats, which only reveal themselves as splats
2533  // when re-interpreted as a vector with a larger element type. For example,
2534  // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
2535  // could be instead splat as
2536  // v2i32 = build_vector i32 0x00010000, i32 0x00010000
2537  // TODO: This optimization could also work on non-constant splats, but it
2538  // would require bit-manipulation instructions to construct the splat value.
2540  unsigned EltBitSize = VT.getScalarSizeInBits();
2541  const auto *BV = cast<BuildVectorSDNode>(Op);
2542  if (VT.isInteger() && EltBitSize < 64 &&
2544  BV->getRepeatedSequence(Sequence) &&
2545  (Sequence.size() * EltBitSize) <= 64) {
2546  unsigned SeqLen = Sequence.size();
2547  MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
2548  MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
2549  assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
2550  ViaIntVT == MVT::i64) &&
2551  "Unexpected sequence type");
2552 
2553  unsigned EltIdx = 0;
2554  uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2555  uint64_t SplatValue = 0;
2556  // Construct the amalgamated value which can be splatted as this larger
2557  // vector type.
2558  for (const auto &SeqV : Sequence) {
2559  if (!SeqV.isUndef())
2560  SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2561  << (EltIdx * EltBitSize));
2562  EltIdx++;
2563  }
2564 
2565  // On RV64, sign-extend from 32 to 64 bits where possible in order to
2566  // achieve better constant materializion.
2567  if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
2568  SplatValue = SignExtend64<32>(SplatValue);
2569 
2570  // Since we can't introduce illegal i64 types at this stage, we can only
2571  // perform an i64 splat on RV32 if it is its own sign-extended value. That
2572  // way we can use RVV instructions to splat.
2573  assert((ViaIntVT.bitsLE(XLenVT) ||
2574  (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
2575  "Unexpected bitcast sequence");
2576  if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2577  SDValue ViaVL =
2578  DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
2579  MVT ViaContainerVT =
2580  getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
2581  SDValue Splat =
2582  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
2583  DAG.getUNDEF(ViaContainerVT),
2584  DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
2585  Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
2586  return DAG.getBitcast(VT, Splat);
2587  }
2588  }
2589 
2590  // Try and optimize BUILD_VECTORs with "dominant values" - these are values
2591  // which constitute a large proportion of the elements. In such cases we can
2592  // splat a vector with the dominant element and make up the shortfall with
2593  // INSERT_VECTOR_ELTs.
2594  // Note that this includes vectors of 2 elements by association. The
2595  // upper-most element is the "dominant" one, allowing us to use a splat to
2596  // "insert" the upper element, and an insert of the lower element at position
2597  // 0, which improves codegen.
2598  SDValue DominantValue;
2599  unsigned MostCommonCount = 0;
2600  DenseMap<SDValue, unsigned> ValueCounts;
2601  unsigned NumUndefElts =
2602  count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
2603 
2604  // Track the number of scalar loads we know we'd be inserting, estimated as
2605  // any non-zero floating-point constant. Other kinds of element are either
2606  // already in registers or are materialized on demand. The threshold at which
2607  // a vector load is more desirable than several scalar materializion and
2608  // vector-insertion instructions is not known.
2609  unsigned NumScalarLoads = 0;
2610 
2611  for (SDValue V : Op->op_values()) {
2612  if (V.isUndef())
2613  continue;
2614 
2615  ValueCounts.insert(std::make_pair(V, 0));
2616  unsigned &Count = ValueCounts[V];
2617 
2618  if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2619  NumScalarLoads += !CFP->isExactlyValue(+0.0);
2620 
2621  // Is this value dominant? In case of a tie, prefer the highest element as
2622  // it's cheaper to insert near the beginning of a vector than it is at the
2623  // end.
2624  if (++Count >= MostCommonCount) {
2625  DominantValue = V;
2626  MostCommonCount = Count;
2627  }
2628  }
2629 
2630  assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
2631  unsigned NumDefElts = NumElts - NumUndefElts;
2632  unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2633 
2634  // Don't perform this optimization when optimizing for size, since
2635  // materializing elements and inserting them tends to cause code bloat.
2636  if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
2637  ((MostCommonCount > DominantValueCountThreshold) ||
2638  (ValueCounts.size() <= Log2_32(NumDefElts)))) {
2639  // Start by splatting the most common element.
2640  SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
2641 
2642  DenseSet<SDValue> Processed{DominantValue};
2643  MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
2644  for (const auto &OpIdx : enumerate(Op->ops())) {
2645  const SDValue &V = OpIdx.value();
2646  if (V.isUndef() || !Processed.insert(V).second)
2647  continue;
2648  if (ValueCounts[V] == 1) {
2649  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
2650  DAG.getConstant(OpIdx.index(), DL, XLenVT));
2651  } else {
2652  // Blend in all instances of this value using a VSELECT, using a
2653  // mask where each bit signals whether that element is the one
2654  // we're after.
2656  transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
2657  return DAG.getConstant(V == V1, DL, XLenVT);
2658  });
2659  Vec = DAG.getNode(ISD::VSELECT, DL, VT,
2660  DAG.getBuildVector(SelMaskTy, DL, Ops),
2661  DAG.getSplatBuildVector(VT, DL, V), Vec);
2662  }
2663  }
2664 
2665  return Vec;
2666  }
2667 
2668  return SDValue();
2669 }
2670 
2671 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2672  SDValue Lo, SDValue Hi, SDValue VL,
2673  SelectionDAG &DAG) {
2674  if (!Passthru)
2675  Passthru = DAG.getUNDEF(VT);
2676  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2677  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2678  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2679  // If Hi constant is all the same sign bit as Lo, lower this as a custom
2680  // node in order to try and match RVV vector/scalar instructions.
2681  if ((LoC >> 31) == HiC)
2682  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
2683 
2684  // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
2685  // vmv.v.x whose EEW = 32 to lower it.
2686  auto *Const = dyn_cast<ConstantSDNode>(VL);
2687  if (LoC == HiC && Const && Const->isAllOnesValue()) {
2688  MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
2689  // TODO: if vl <= min(VLMAX), we can also do this. But we could not
2690  // access the subtarget here now.
2691  auto InterVec = DAG.getNode(
2692  RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
2693  DAG.getRegister(RISCV::X0, MVT::i32));
2694  return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
2695  }
2696  }
2697 
2698  // Fall back to a stack store and stride x0 vector load.
2699  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
2700  Hi, VL);
2701 }
2702 
2703 // Called by type legalization to handle splat of i64 on RV32.
2704 // FIXME: We can optimize this when the type has sign or zero bits in one
2705 // of the halves.
2706 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2707  SDValue Scalar, SDValue VL,
2708  SelectionDAG &DAG) {
2709  assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2710  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2711  DAG.getConstant(0, DL, MVT::i32));
2712  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2713  DAG.getConstant(1, DL, MVT::i32));
2714  return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
2715 }
2716 
2717 // This function lowers a splat of a scalar operand Splat with the vector
2718 // length VL. It ensures the final sequence is type legal, which is useful when
2719 // lowering a splat after type legalization.
2720 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
2721  MVT VT, SDLoc DL, SelectionDAG &DAG,
2722  const RISCVSubtarget &Subtarget) {
2723  bool HasPassthru = Passthru && !Passthru.isUndef();
2724  if (!HasPassthru && !Passthru)
2725  Passthru = DAG.getUNDEF(VT);
2726  if (VT.isFloatingPoint()) {
2727  // If VL is 1, we could use vfmv.s.f.
2728  if (isOneConstant(VL))
2729  return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
2730  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
2731  }
2732 
2733  MVT XLenVT = Subtarget.getXLenVT();
2734 
2735  // Simplest case is that the operand needs to be promoted to XLenVT.
2736  if (Scalar.getValueType().bitsLE(XLenVT)) {
2737  // If the operand is a constant, sign extend to increase our chances
2738  // of being able to use a .vi instruction. ANY_EXTEND would become a
2739  // a zero extend and the simm5 check in isel would fail.
2740  // FIXME: Should we ignore the upper bits in isel instead?
2741  unsigned ExtOpc =
2742  isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2743  Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2744  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
2745  // If VL is 1 and the scalar value won't benefit from immediate, we could
2746  // use vmv.s.x.
2747  if (isOneConstant(VL) &&
2748  (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2749  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
2750  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
2751  }
2752 
2753  assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2754  "Unexpected scalar for splat lowering!");
2755 
2756  if (isOneConstant(VL) && isNullConstant(Scalar))
2757  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
2758  DAG.getConstant(0, DL, XLenVT), VL);
2759 
2760  // Otherwise use the more complicated splatting algorithm.
2761  return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
2762 }
2763 
2764 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
2765  const RISCVSubtarget &Subtarget) {
2766  // We need to be able to widen elements to the next larger integer type.
2767  if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
2768  return false;
2769 
2770  int Size = Mask.size();
2771  assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
2772 
2773  int Srcs[] = {-1, -1};
2774  for (int i = 0; i != Size; ++i) {
2775  // Ignore undef elements.
2776  if (Mask[i] < 0)
2777  continue;
2778 
2779  // Is this an even or odd element.
2780  int Pol = i % 2;
2781 
2782  // Ensure we consistently use the same source for this element polarity.
2783  int Src = Mask[i] / Size;
2784  if (Srcs[Pol] < 0)
2785  Srcs[Pol] = Src;
2786  if (Srcs[Pol] != Src)
2787  return false;
2788 
2789  // Make sure the element within the source is appropriate for this element
2790  // in the destination.
2791  int Elt = Mask[i] % Size;
2792  if (Elt != i / 2)
2793  return false;
2794  }
2795 
2796  // We need to find a source for each polarity and they can't be the same.
2797  if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
2798  return false;
2799 
2800  // Swap the sources if the second source was in the even polarity.
2801  SwapSources = Srcs[0] > Srcs[1];
2802 
2803  return true;
2804 }
2805 
2806 /// Match shuffles that concatenate two vectors, rotate the concatenation,
2807 /// and then extract the original number of elements from the rotated result.
2808 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
2809 /// returned rotation amount is for a rotate right, where elements move from
2810 /// higher elements to lower elements. \p LoSrc indicates the first source
2811 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
2812 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
2813 /// 0 or 1 if a rotation is found.
2814 ///
2815 /// NOTE: We talk about rotate to the right which matches how bit shift and
2816 /// rotate instructions are described where LSBs are on the right, but LLVM IR
2817 /// and the table below write vectors with the lowest elements on the left.
2818 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
2819  int Size = Mask.size();
2820 
2821  // We need to detect various ways of spelling a rotation:
2822  // [11, 12, 13, 14, 15, 0, 1, 2]
2823  // [-1, 12, 13, 14, -1, -1, 1, -1]
2824  // [-1, -1, -1, -1, -1, -1, 1, 2]
2825  // [ 3, 4, 5, 6, 7, 8, 9, 10]
2826  // [-1, 4, 5, 6, -1, -1, 9, -1]
2827  // [-1, 4, 5, 6, -1, -1, -1, -1]
2828  int Rotation = 0;
2829  LoSrc = -1;
2830  HiSrc = -1;
2831  for (int i = 0; i != Size; ++i) {
2832  int M = Mask[i];
2833  if (M < 0)
2834  continue;
2835 
2836  // Determine where a rotate vector would have started.
2837  int StartIdx = i - (M % Size);
2838  // The identity rotation isn't interesting, stop.
2839  if (StartIdx == 0)
2840  return -1;
2841 
2842  // If we found the tail of a vector the rotation must be the missing
2843  // front. If we found the head of a vector, it must be how much of the
2844  // head.
2845  int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
2846 
2847  if (Rotation == 0)
2848  Rotation = CandidateRotation;
2849  else if (Rotation != CandidateRotation)
2850  // The rotations don't match, so we can't match this mask.
2851  return -1;
2852 
2853  // Compute which value this mask is pointing at.
2854  int MaskSrc = M < Size ? 0 : 1;
2855 
2856  // Compute which of the two target values this index should be assigned to.
2857  // This reflects whether the high elements are remaining or the low elemnts
2858  // are remaining.
2859  int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
2860 
2861  // Either set up this value if we've not encountered it before, or check
2862  // that it remains consistent.
2863  if (TargetSrc < 0)
2864  TargetSrc = MaskSrc;
2865  else if (TargetSrc != MaskSrc)
2866  // This may be a rotation, but it pulls from the inputs in some
2867  // unsupported interleaving.
2868  return -1;
2869  }
2870 
2871  // Check that we successfully analyzed the mask, and normalize the results.
2872  assert(Rotation != 0 && "Failed to locate a viable rotation!");
2873  assert((LoSrc >= 0 || HiSrc >= 0) &&
2874  "Failed to find a rotated input vector!");
2875 
2876  return Rotation;
2877 }
2878 
2879 // Lower the following shuffles to vnsrl.
2880 // t34: v8i8 = extract_subvector t11, Constant:i64<0>
2881 // t33: v8i8 = extract_subvector t11, Constant:i64<8>
2882 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
2883 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
2885  MVT ContainerVT, SDValue V1,
2886  SDValue V2, SDValue TrueMask,
2888  const RISCVSubtarget &Subtarget,
2889  SelectionDAG &DAG) {
2890  // Need to be able to widen the vector.
2891  if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
2892  return SDValue();
2893 
2894  // Both input must be extracts.
2895  if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
2896  V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2897  return SDValue();
2898 
2899  // Extracting from the same source.
2900  SDValue Src = V1.getOperand(0);
2901  if (Src != V2.getOperand(0))
2902  return SDValue();
2903 
2904  // Src needs to have twice the number of elements.
2905  if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
2906  return SDValue();
2907 
2908  // The extracts must extract the two halves of the source.
2909  if (V1.getConstantOperandVal(1) != 0 ||
2910  V2.getConstantOperandVal(1) != Mask.size())
2911  return SDValue();
2912 
2913  // First index must be the first even or odd element from V1.
2914  if (Mask[0] != 0 && Mask[0] != 1)
2915  return SDValue();
2916 
2917  // The others must increase by 2 each time.
2918  // TODO: Support undef elements?
2919  for (unsigned i = 1; i != Mask.size(); ++i)
2920  if (Mask[i] != Mask[i - 1] + 2)
2921  return SDValue();
2922 
2923  // Convert the source using a container type with twice the elements. Since
2924  // source VT is legal and twice this VT, we know VT isn't LMUL=8 so it is
2925  // safe to double.
2926  MVT DoubleContainerVT =
2927  MVT::getVectorVT(ContainerVT.getVectorElementType(),
2928  ContainerVT.getVectorElementCount() * 2);
2929  Src = convertToScalableVector(DoubleContainerVT, Src, DAG, Subtarget);
2930 
2931  // Convert the vector to a wider integer type with the original element
2932  // count. This also converts FP to int.
2933  unsigned EltBits = ContainerVT.getScalarSizeInBits();
2934  MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
2935  MVT WideIntContainerVT =
2936  MVT::getVectorVT(WideIntEltVT, ContainerVT.getVectorElementCount());
2937  Src = DAG.getBitcast(WideIntContainerVT, Src);
2938 
2939  // Convert to the integer version of the container type.
2940  MVT IntEltVT = MVT::getIntegerVT(EltBits);
2941  MVT IntContainerVT =
2942  MVT::getVectorVT(IntEltVT, ContainerVT.getVectorElementCount());
2943 
2944  // If we want even elements, then the shift amount is 0. Otherwise, shift by
2945  // the original element size.
2946  unsigned Shift = Mask[0] == 0 ? 0 : EltBits;
2947  SDValue SplatShift = DAG.getNode(
2948  RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
2949  DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
2950  SDValue Res =
2951  DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
2952  DAG.getUNDEF(IntContainerVT), TrueMask, VL);
2953  // Cast back to FP if needed.
2954  Res = DAG.getBitcast(ContainerVT, Res);
2955 
2956  return convertFromScalableVector(VT, Res, DAG, Subtarget);
2957 }
2958 
2959 // Lower the following shuffle to vslidedown.
2960 // a)
2961 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
2962 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
2963 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
2964 // b)
2965 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
2966 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
2967 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
2968 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
2969 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
2970 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
2972  SDValue V1, SDValue V2,
2974  const RISCVSubtarget &Subtarget,
2975  SelectionDAG &DAG) {
2976  auto findNonEXTRACT_SUBVECTORParent =
2977  [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
2978  uint64_t Offset = 0;
2979  while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
2980  // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
2981  // a scalable vector. But we don't want to match the case.
2982  Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
2983  Offset += Parent.getConstantOperandVal(1);
2984  Parent = Parent.getOperand(0);
2985  }
2986  return std::make_pair(Parent, Offset);
2987  };
2988 
2989  auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
2990  auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
2991 
2992  // Extracting from the same source.
2993  SDValue Src = V1Src;
2994  if (Src != V2Src)
2995  return SDValue();
2996 
2997  // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
2998  SmallVector<int, 16> NewMask(Mask);
2999  for (size_t i = 0; i != NewMask.size(); ++i) {
3000  if (NewMask[i] == -1)
3001  continue;
3002 
3003  if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
3004  NewMask[i] = NewMask[i] + V1IndexOffset;
3005  } else {
3006  // Minus NewMask.size() is needed. Otherwise, the b case would be
3007  // <5,6,7,12> instead of <5,6,7,8>.
3008  NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
3009  }
3010  }
3011 
3012  // First index must be known and non-zero. It will be used as the slidedown
3013  // amount.
3014  if (NewMask[0] <= 0)
3015  return SDValue();
3016 
3017  // NewMask is also continuous.
3018  for (unsigned i = 1; i != NewMask.size(); ++i)
3019  if (NewMask[i - 1] + 1 != NewMask[i])
3020  return SDValue();
3021 
3022  MVT XLenVT = Subtarget.getXLenVT();
3023  MVT SrcVT = Src.getSimpleValueType();
3024  MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3025  auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
3026  SDValue Slidedown = DAG.getNode(
3027  RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3028  convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
3029  DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
3030  return DAG.getNode(
3032  convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
3033  DAG.getConstant(0, DL, XLenVT));
3034 }
3035 
3037  const RISCVSubtarget &Subtarget) {
3038  SDValue V1 = Op.getOperand(0);
3039  SDValue V2 = Op.getOperand(1);
3040  SDLoc DL(Op);
3041  MVT XLenVT = Subtarget.getXLenVT();
3042  MVT VT = Op.getSimpleValueType();
3043  unsigned NumElts = VT.getVectorNumElements();
3044  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3045 
3046  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3047 
3048  auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3049 
3050  if (SVN->isSplat()) {
3051  const int Lane = SVN->getSplatIndex();
3052  if (Lane >= 0) {
3053  MVT SVT = VT.getVectorElementType();
3054 
3055  // Turn splatted vector load into a strided load with an X0 stride.
3056  SDValue V = V1;
3057  // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
3058  // with undef.
3059  // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
3060  int Offset = Lane;
3061  if (V.getOpcode() == ISD::CONCAT_VECTORS) {
3062  int OpElements =
3064  V = V.getOperand(Offset / OpElements);
3065  Offset %= OpElements;
3066  }
3067 
3068  // We need to ensure the load isn't atomic or volatile.
3069  if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
3070  auto *Ld = cast<LoadSDNode>(V);
3071  Offset *= SVT.getStoreSize();
3072  SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
3073  TypeSize::Fixed(Offset), DL);
3074 
3075  // If this is SEW=64 on RV32, use a strided load with a stride of x0.
3076  if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
3077  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
3078  SDValue IntID =
3079  DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
3080  SDValue Ops[] = {Ld->getChain(),
3081  IntID,
3082  DAG.getUNDEF(ContainerVT),
3083  NewAddr,
3084  DAG.getRegister(RISCV::X0, XLenVT),
3085  VL};
3086  SDValue NewLoad = DAG.getMemIntrinsicNode(
3087  ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
3089  Ld->getMemOperand(), Offset, SVT.getStoreSize()));
3090  DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
3091  return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
3092  }
3093 
3094  // Otherwise use a scalar load and splat. This will give the best
3095  // opportunity to fold a splat into the operation. ISel can turn it into
3096  // the x0 strided load if we aren't able to fold away the select.
3097  if (SVT.isFloatingPoint())
3098  V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
3099  Ld->getPointerInfo().getWithOffset(Offset),
3100  Ld->getOriginalAlign(),
3101  Ld->getMemOperand()->getFlags());
3102  else
3103  V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
3104  Ld->getPointerInfo().getWithOffset(Offset), SVT,
3105  Ld->getOriginalAlign(),
3106  Ld->getMemOperand()->getFlags());
3107  DAG.makeEquivalentMemoryOrdering(Ld, V);
3108 
3109  unsigned Opc =
3111  SDValue Splat =
3112  DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
3113  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3114  }
3115 
3116  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
3117  assert(Lane < (int)NumElts && "Unexpected lane!");
3118  SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
3119  V1, DAG.getConstant(Lane, DL, XLenVT),
3120  DAG.getUNDEF(ContainerVT), TrueMask, VL);
3121  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3122  }
3123  }
3124 
3125  ArrayRef<int> Mask = SVN->getMask();
3126 
3127  if (SDValue V =
3128  lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
3129  return V;
3130 
3131  // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
3132  // be undef which can be handled with a single SLIDEDOWN/UP.
3133  int LoSrc, HiSrc;
3134  int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
3135  if (Rotation > 0) {
3136  SDValue LoV, HiV;
3137  if (LoSrc >= 0) {
3138  LoV = LoSrc == 0 ? V1 : V2;
3139  LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
3140  }
3141  if (HiSrc >= 0) {
3142  HiV = HiSrc == 0 ? V1 : V2;
3143  HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
3144  }
3145 
3146  // We found a rotation. We need to slide HiV down by Rotation. Then we need
3147  // to slide LoV up by (NumElts - Rotation).
3148  unsigned InvRotate = NumElts - Rotation;
3149 
3150  SDValue Res = DAG.getUNDEF(ContainerVT);
3151  if (HiV) {
3152  // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN.
3153  // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it
3154  // causes multiple vsetvlis in some test cases such as lowering
3155  // reduce.mul
3156  SDValue DownVL = VL;
3157  if (LoV)
3158  DownVL = DAG.getConstant(InvRotate, DL, XLenVT);
3159  Res =
3160  DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, Res, HiV,
3161  DAG.getConstant(Rotation, DL, XLenVT), TrueMask, DownVL);
3162  }
3163  if (LoV)
3164  Res = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Res, LoV,
3165  DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL);
3166 
3167  return convertFromScalableVector(VT, Res, DAG, Subtarget);
3168  }
3169 
3171  DL, VT, ContainerVT, V1, V2, TrueMask, VL, Mask, Subtarget, DAG))
3172  return V;
3173 
3174  // Detect an interleave shuffle and lower to
3175  // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
3176  bool SwapSources;
3177  if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
3178  // Swap sources if needed.
3179  if (SwapSources)
3180  std::swap(V1, V2);
3181 
3182  // Extract the lower half of the vectors.
3183  MVT HalfVT = VT.getHalfNumVectorElementsVT();
3184  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
3185  DAG.getConstant(0, DL, XLenVT));
3186  V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
3187  DAG.getConstant(0, DL, XLenVT));
3188 
3189  // Double the element width and halve the number of elements in an int type.
3190  unsigned EltBits = VT.getScalarSizeInBits();
3191  MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
3192  MVT WideIntVT =
3193  MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
3194  // Convert this to a scalable vector. We need to base this on the
3195  // destination size to ensure there's always a type with a smaller LMUL.
3196  MVT WideIntContainerVT =
3197  getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
3198 
3199  // Convert sources to scalable vectors with the same element count as the
3200  // larger type.
3201  MVT HalfContainerVT = MVT::getVectorVT(
3202  VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
3203  V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
3204  V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
3205 
3206  // Cast sources to integer.
3207  MVT IntEltVT = MVT::getIntegerVT(EltBits);
3208  MVT IntHalfVT =
3209  MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
3210  V1 = DAG.getBitcast(IntHalfVT, V1);
3211  V2 = DAG.getBitcast(IntHalfVT, V2);
3212 
3213  // Freeze V2 since we use it twice and we need to be sure that the add and
3214  // multiply see the same value.
3215  V2 = DAG.getFreeze(V2);
3216 
3217  // Recreate TrueMask using the widened type's element count.
3218  TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG);
3219 
3220  // Widen V1 and V2 with 0s and add one copy of V2 to V1.
3221  SDValue Add =
3222  DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1, V2,
3223  DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
3224  // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
3225  SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
3226  DAG.getUNDEF(IntHalfVT),
3227  DAG.getAllOnesConstant(DL, XLenVT), VL);
3228  SDValue WidenMul =
3229  DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT, V2, Multiplier,
3230  DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
3231  // Add the new copies to our previous addition giving us 2^eltbits copies of
3232  // V2. This is equivalent to shifting V2 left by eltbits. This should
3233  // combine with the vwmulu.vv above to form vwmaccu.vv.
3234  Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
3235  DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
3236  // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
3237  // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
3238  // vector VT.
3239  ContainerVT =
3241  WideIntContainerVT.getVectorElementCount() * 2);
3242  Add = DAG.getBitcast(ContainerVT, Add);
3243  return convertFromScalableVector(VT, Add, DAG, Subtarget);
3244  }
3245 
3246  // Detect shuffles which can be re-expressed as vector selects; these are
3247  // shuffles in which each element in the destination is taken from an element
3248  // at the corresponding index in either source vectors.
3249  bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
3250  int MaskIndex = MaskIdx.value();
3251  return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
3252  });
3253 
3254  assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
3255 
3256  SmallVector<SDValue> MaskVals;
3257  // As a backup, shuffles can be lowered via a vrgather instruction, possibly
3258  // merged with a second vrgather.
3259  SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
3260 
3261  // By default we preserve the original operand order, and use a mask to
3262  // select LHS as true and RHS as false. However, since RVV vector selects may
3263  // feature splats but only on the LHS, we may choose to invert our mask and
3264  // instead select between RHS and LHS.
3265  bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
3266  bool InvertMask = IsSelect == SwapOps;
3267 
3268  // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
3269  // half.
3270  DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
3271 
3272  // Now construct the mask that will be used by the vselect or blended
3273  // vrgather operation. For vrgathers, construct the appropriate indices into
3274  // each vector.
3275  for (int MaskIndex : Mask) {
3276  bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
3277  MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3278  if (!IsSelect) {
3279  bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
3280  GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
3281  ? DAG.getConstant(MaskIndex, DL, XLenVT)
3282  : DAG.getUNDEF(XLenVT));
3283  GatherIndicesRHS.push_back(
3284  IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
3285  : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
3286  if (IsLHSOrUndefIndex && MaskIndex >= 0)
3287  ++LHSIndexCounts[MaskIndex];
3288  if (!IsLHSOrUndefIndex)
3289  ++RHSIndexCounts[MaskIndex - NumElts];
3290  }
3291  }
3292 
3293  if (SwapOps) {
3294  std::swap(V1, V2);
3295  std::swap(GatherIndicesLHS, GatherIndicesRHS);
3296  }
3297 
3298  assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
3299  MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3300  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3301 
3302  if (IsSelect)
3303  return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
3304 
3305  if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
3306  // On such a large vector we're unable to use i8 as the index type.
3307  // FIXME: We could promote the index to i16 and use vrgatherei16, but that
3308  // may involve vector splitting if we're already at LMUL=8, or our
3309  // user-supplied maximum fixed-length LMUL.
3310  return SDValue();
3311  }
3312 
3313  unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
3314  unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
3315  MVT IndexVT = VT.changeTypeToInteger();
3316  // Since we can't introduce illegal index types at this stage, use i16 and
3317  // vrgatherei16 if the corresponding index type for plain vrgather is greater
3318  // than XLenVT.
3319  if (IndexVT.getScalarType().bitsGT(XLenVT)) {
3320  GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
3321  IndexVT = IndexVT.changeVectorElementType(MVT::i16);
3322  }
3323 
3324  MVT IndexContainerVT =
3325  ContainerVT.changeVectorElementType(IndexVT.getScalarType());
3326 
3327  SDValue Gather;
3328  // TODO: This doesn't trigger for i64 vectors on RV32, since there we
3329  // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
3330  if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
3331  Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
3332  Subtarget);
3333  } else {
3334  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
3335  // If only one index is used, we can use a "splat" vrgather.
3336  // TODO: We can splat the most-common index and fix-up any stragglers, if
3337  // that's beneficial.
3338  if (LHSIndexCounts.size() == 1) {
3339  int SplatIndex = LHSIndexCounts.begin()->getFirst();
3340  Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
3341  DAG.getConstant(SplatIndex, DL, XLenVT),
3342  DAG.getUNDEF(ContainerVT), TrueMask, VL);
3343  } else {
3344  SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
3345  LHSIndices =
3346  convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
3347 
3348  Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
3349  DAG.getUNDEF(ContainerVT), TrueMask, VL);
3350  }
3351  }
3352 
3353  // If a second vector operand is used by this shuffle, blend it in with an
3354  // additional vrgather.
3355  if (!V2.isUndef()) {
3356  V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
3357 
3358  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
3359  SelectMask =
3360  convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
3361 
3362  // If only one index is used, we can use a "splat" vrgather.
3363  // TODO: We can splat the most-common index and fix-up any stragglers, if
3364  // that's beneficial.
3365  if (RHSIndexCounts.size() == 1) {
3366  int SplatIndex = RHSIndexCounts.begin()->getFirst();
3367  Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
3368  DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
3369  SelectMask, VL);
3370  } else {
3371  SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
3372  RHSIndices =
3373  convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
3374  Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
3375  SelectMask, VL);
3376  }
3377  }
3378 
3379  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3380 }
3381 
3383  // Support splats for any type. These should type legalize well.
3384  if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
3385  return true;
3386 
3387  // Only support legal VTs for other shuffles for now.
3388  if (!isTypeLegal(VT))
3389  return false;
3390 
3391  MVT SVT = VT.getSimpleVT();
3392 
3393  bool SwapSources;
3394  int LoSrc, HiSrc;
3395  return (isElementRotate(LoSrc, HiSrc, M) > 0) ||
3396  isInterleaveShuffle(M, SVT, SwapSources, Subtarget);
3397 }
3398 
3399 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
3400 // the exponent.
3402  MVT VT = Op.getSimpleValueType();
3403  unsigned EltSize = VT.getScalarSizeInBits();
3404  SDValue Src = Op.getOperand(0);
3405  SDLoc DL(Op);
3406 
3407  // We need a FP type that can represent the value.
3408  // TODO: Use f16 for i8 when possible?
3409  MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
3410  MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
3411 
3412  // Legal types should have been checked in the RISCVTargetLowering
3413  // constructor.
3414  // TODO: Splitting may make sense in some cases.
3415  assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
3416  "Expected legal float type!");
3417 
3418  // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
3419  // The trailing zero count is equal to log2 of this single bit value.
3420  if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
3421  SDValue Neg = DAG.getNegative(Src, DL, VT);
3422  Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
3423  }
3424 
3425  // We have a legal FP type, convert to it.
3426  SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
3427  // Bitcast to integer and shift the exponent to the LSB.
3428  EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
3429  SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
3430  unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
3431  SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
3432  DAG.getConstant(ShiftAmt, DL, IntVT));
3433  // Truncate back to original type to allow vnsrl.
3434  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
3435  // The exponent contains log2 of the value in biased form.
3436  unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
3437 
3438  // For trailing zeros, we just need to subtract the bias.
3439  if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
3440  return DAG.getNode(ISD::SUB, DL, VT, Trunc,
3441  DAG.getConstant(ExponentBias, DL, VT));
3442 
3443  // For leading zeros, we need to remove the bias and convert from log2 to
3444  // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
3445  unsigned Adjust = ExponentBias + (EltSize - 1);
3446  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
3447 }
3448 
3449 // While RVV has alignment restrictions, we should always be able to load as a
3450 // legal equivalently-sized byte-typed vector instead. This method is
3451 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
3452 // the load is already correctly-aligned, it returns SDValue().
3453 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
3454  SelectionDAG &DAG) const {
3455  auto *Load = cast<LoadSDNode>(Op);
3456  assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
3457 
3459  Load->getMemoryVT(),
3460  *Load->getMemOperand()))
3461  return SDValue();
3462 
3463  SDLoc DL(Op);
3464  MVT VT = Op.getSimpleValueType();
3465  unsigned EltSizeBits = VT.getScalarSizeInBits();
3466  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
3467  "Unexpected unaligned RVV load type");
3468  MVT NewVT =
3469  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
3470  assert(NewVT.isValid() &&
3471  "Expecting equally-sized RVV vector types to be legal");
3472  SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
3473  Load->getPointerInfo(), Load->getOriginalAlign(),
3474  Load->getMemOperand()->getFlags());
3475  return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
3476 }
3477 
3478 // While RVV has alignment restrictions, we should always be able to store as a
3479 // legal equivalently-sized byte-typed vector instead. This method is
3480 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
3481 // returns SDValue() if the store is already correctly aligned.
3482 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
3483  SelectionDAG &DAG) const {
3484  auto *Store = cast<StoreSDNode>(Op);
3485  assert(Store && Store->getValue().getValueType().isVector() &&
3486  "Expected vector store");
3487 
3489  Store->getMemoryVT(),
3490  *Store->getMemOperand()))
3491  return SDValue();
3492 
3493  SDLoc DL(Op);
3494  SDValue StoredVal = Store->getValue();
3495  MVT VT = StoredVal.getSimpleValueType();
3496  unsigned EltSizeBits = VT.getScalarSizeInBits();
3497  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
3498  "Unexpected unaligned RVV store type");
3499  MVT NewVT =
3500  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
3501  assert(NewVT.isValid() &&
3502  "Expecting equally-sized RVV vector types to be legal");
3503  StoredVal = DAG.getBitcast(NewVT, StoredVal);
3504  return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
3505  Store->getPointerInfo(), Store->getOriginalAlign(),
3506  Store->getMemOperand()->getFlags());
3507 }
3508 
3510  const RISCVSubtarget &Subtarget) {
3511  assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
3512 
3513  int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
3514 
3515  // All simm32 constants should be handled by isel.
3516  // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
3517  // this check redundant, but small immediates are common so this check
3518  // should have better compile time.
3519  if (isInt<32>(Imm))
3520  return Op;
3521 
3522  // We only need to cost the immediate, if constant pool lowering is enabled.
3523  if (!Subtarget.useConstantPoolForLargeInts())
3524  return Op;
3525 
3526  RISCVMatInt::InstSeq Seq =
3527  RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
3528  if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
3529  return Op;
3530 
3531  // Expand to a constant pool using the default expansion code.
3532  return SDValue();
3533 }
3534 
3536  SelectionDAG &DAG) const {
3537  switch (Op.getOpcode()) {
3538  default:
3539  report_fatal_error("unimplemented operand");
3540  case ISD::GlobalAddress:
3541  return lowerGlobalAddress(Op, DAG);
3542  case ISD::BlockAddress:
3543  return lowerBlockAddress(Op, DAG);
3544  case ISD::ConstantPool:
3545  return lowerConstantPool(Op, DAG);
3546  case ISD::JumpTable:
3547  return lowerJumpTable(Op, DAG);
3548  case ISD::GlobalTLSAddress:
3549  return lowerGlobalTLSAddress(Op, DAG);
3550  case ISD::Constant:
3551  return lowerConstant(Op, DAG, Subtarget);
3552  case ISD::SELECT:
3553  return lowerSELECT(Op, DAG);
3554  case ISD::BRCOND:
3555  return lowerBRCOND(Op, DAG);
3556  case ISD::VASTART:
3557  return lowerVASTART(Op, DAG);
3558  case ISD::FRAMEADDR:
3559  return lowerFRAMEADDR(Op, DAG);
3560  case ISD::RETURNADDR:
3561  return lowerRETURNADDR(Op, DAG);
3562  case ISD::SHL_PARTS:
3563  return lowerShiftLeftParts(Op, DAG);
3564  case ISD::SRA_PARTS:
3565  return lowerShiftRightParts(Op, DAG, true);
3566  case ISD::SRL_PARTS:
3567  return lowerShiftRightParts(Op, DAG, false);
3568  case ISD::BITCAST: {
3569  SDLoc DL(Op);
3570  EVT VT = Op.getValueType();
3571  SDValue Op0 = Op.getOperand(0);
3572  EVT Op0VT = Op0.getValueType();
3573  MVT XLenVT = Subtarget.getXLenVT();
3574  if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
3575  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
3576  SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
3577  return FPConv;
3578  }
3579  if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
3580  Subtarget.hasStdExtF()) {
3581  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3582  SDValue FPConv =
3583  DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
3584  return FPConv;
3585  }
3586 
3587  // Consider other scalar<->scalar casts as legal if the types are legal.
3588  // Otherwise expand them.
3589  if (!VT.isVector() && !Op0VT.isVector()) {
3590  if (isTypeLegal(VT) && isTypeLegal(Op0VT))
3591  return Op;
3592  return SDValue();
3593  }
3594 
3595  assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
3596  "Unexpected types");
3597 
3598  if (VT.isFixedLengthVector()) {
3599  // We can handle fixed length vector bitcasts with a simple replacement
3600  // in isel.
3601  if (Op0VT.isFixedLengthVector())
3602  return Op;
3603  // When bitcasting from scalar to fixed-length vector, insert the scalar
3604  // into a one-element vector of the result type, and perform a vector
3605  // bitcast.
3606  if (!Op0VT.isVector()) {
3607  EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
3608  if (!isTypeLegal(BVT))
3609  return SDValue();
3610  return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
3611  DAG.getUNDEF(BVT), Op0,
3612  DAG.getConstant(0, DL, XLenVT)));
3613  }
3614  return SDValue();
3615  }
3616  // Custom-legalize bitcasts from fixed-length vector types to scalar types
3617  // thus: bitcast the vector to a one-element vector type whose element type
3618  // is the same as the result type, and extract the first element.
3619  if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
3620  EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
3621  if (!isTypeLegal(BVT))
3622  return SDValue();
3623  SDValue BVec = DAG.getBitcast(BVT, Op0);
3624  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
3625  DAG.getConstant(0, DL, XLenVT));
3626  }
3627  return SDValue();
3628  }
3630  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3632  return LowerINTRINSIC_W_CHAIN(Op, DAG);
3633  case ISD::INTRINSIC_VOID:
3634  return LowerINTRINSIC_VOID(Op, DAG);
3635  case ISD::BITREVERSE: {
3636  MVT VT = Op.getSimpleValueType();
3637  SDLoc DL(Op);
3638  assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
3639  assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
3640  // Expand bitreverse to a bswap(rev8) followed by brev8.
3641  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
3642  return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
3643  }
3644  case ISD::TRUNCATE:
3645  // Only custom-lower vector truncates
3646  if (!Op.getSimpleValueType().isVector())
3647  return Op;
3648  return lowerVectorTruncLike(Op, DAG);
3649  case ISD::ANY_EXTEND:
3650  case ISD::ZERO_EXTEND:
3651  if (Op.getOperand(0).getValueType().isVector() &&
3652  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3653  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
3654  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
3655  case ISD::SIGN_EXTEND:
3656  if (Op.getOperand(0).getValueType().isVector() &&
3657  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3658  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
3659  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
3661  return lowerSPLAT_VECTOR_PARTS(Op, DAG);
3663  return lowerINSERT_VECTOR_ELT(Op, DAG);
3665  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
3666  case ISD::VSCALE: {
3667  MVT VT = Op.getSimpleValueType();
3668  SDLoc DL(Op);
3669  SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
3670  // We define our scalable vector types for lmul=1 to use a 64 bit known
3671  // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
3672  // vscale as VLENB / 8.
3673  static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
3674  if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
3675  report_fatal_error("Support for VLEN==32 is incomplete.");
3676  // We assume VLENB is a multiple of 8. We manually choose the best shift
3677  // here because SimplifyDemandedBits isn't always able to simplify it.
3678  uint64_t Val = Op.getConstantOperandVal(0);
3679  if (isPowerOf2_64(Val)) {
3680  uint64_t Log2 = Log2_64(Val);
3681  if (Log2 < 3)
3682  return DAG.getNode(ISD::SRL, DL, VT, VLENB,
3683  DAG.getConstant(3 - Log2, DL, VT));
3684  if (Log2 > 3)
3685  return DAG.getNode(ISD::SHL, DL, VT, VLENB,
3686  DAG.getConstant(Log2 - 3, DL, VT));
3687  return VLENB;
3688  }
3689  // If the multiplier is a multiple of 8, scale it down to avoid needing
3690  // to shift the VLENB value.
3691  if ((Val % 8) == 0)
3692  return DAG.getNode(ISD::MUL, DL, VT, VLENB,
3693  DAG.getConstant(Val / 8, DL, VT));
3694 
3695  SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
3696  DAG.getConstant(3, DL, VT));
3697  return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
3698  }
3699  case ISD::FPOWI: {
3700  // Custom promote f16 powi with illegal i32 integer type on RV64. Once
3701  // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
3702  if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
3703  Op.getOperand(1).getValueType() == MVT::i32) {
3704  SDLoc DL(Op);
3705  SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
3706  SDValue Powi =
3707  DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
3708  return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
3709  DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3710  }
3711  return SDValue();
3712  }
3713  case ISD::FP_EXTEND:
3714  case ISD::FP_ROUND:
3715  if (!Op.getValueType().isVector())
3716  return Op;
3717  return lowerVectorFPExtendOrRoundLike(Op, DAG);
3718  case ISD::FP_TO_SINT:
3719  case ISD::FP_TO_UINT:
3720  case ISD::SINT_TO_FP:
3721  case ISD::UINT_TO_FP: {
3722  // RVV can only do fp<->int conversions to types half/double the size as
3723  // the source. We custom-lower any conversions that do two hops into
3724  // sequences.
3725  MVT VT = Op.getSimpleValueType();
3726  if (!VT.isVector())
3727  return Op;
3728  SDLoc DL(Op);
3729  SDValue Src = Op.getOperand(0);
3730  MVT EltVT = VT.getVectorElementType();
3731  MVT SrcVT = Src.getSimpleValueType();
3732  MVT SrcEltVT = SrcVT.getVectorElementType();
3733  unsigned EltSize = EltVT.getSizeInBits();
3734  unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3735  assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
3736  "Unexpected vector element types");
3737 
3738  bool IsInt2FP = SrcEltVT.isInteger();
3739  // Widening conversions
3740  if (EltSize > (2 * SrcEltSize)) {
3741  if (IsInt2FP) {
3742  // Do a regular integer sign/zero extension then convert to float.
3743  MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize),
3744  VT.getVectorElementCount());
3745  unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
3747  : ISD::SIGN_EXTEND;
3748  SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
3749  return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
3750  }
3751  // FP2Int
3752  assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
3753  // Do one doubling fp_extend then complete the operation by converting
3754  // to int.
3755  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3756  SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
3757  return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
3758  }
3759 
3760  // Narrowing conversions
3761  if (SrcEltSize > (2 * EltSize)) {
3762  if (IsInt2FP) {
3763  // One narrowing int_to_fp, then an fp_round.
3764  assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
3765  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3766  SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
3767  return DAG.getFPExtendOrRound(Int2FP, DL, VT);
3768  }
3769  // FP2Int
3770  // One narrowing fp_to_int, then truncate the integer. If the float isn't
3771  // representable by the integer, the result is poison.
3772  MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
3773  VT.getVectorElementCount());
3774  SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
3775  return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
3776  }
3777 
3778  // Scalable vectors can exit here. Patterns will handle equally-sized
3779  // conversions halving/doubling ones.
3780  if (!VT.isFixedLengthVector())
3781  return Op;
3782 
3783  // For fixed-length vectors we lower to a custom "VL" node.
3784  unsigned RVVOpc = 0;
3785  switch (Op.getOpcode()) {
3786  default:
3787  llvm_unreachable("Impossible opcode");
3788  case ISD::FP_TO_SINT:
3789  RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
3790  break;
3791  case ISD::FP_TO_UINT:
3792  RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
3793  break;
3794  case ISD::SINT_TO_FP:
3795  RVVOpc = RISCVISD::SINT_TO_FP_VL;
3796  break;
3797  case ISD::UINT_TO_FP:
3798  RVVOpc = RISCVISD::UINT_TO_FP_VL;
3799  break;
3800  }
3801 
3802  MVT ContainerVT = getContainerForFixedLengthVector(VT);
3803  MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3804  assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
3805  "Expected same element count");
3806 
3807  auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3808 
3809  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3810  Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
3811  return convertFromScalableVector(VT, Src, DAG, Subtarget);
3812  }
3813  case ISD::FP_TO_SINT_SAT:
3814  case ISD::FP_TO_UINT_SAT:
3815  return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
3816  case ISD::FTRUNC:
3817  case ISD::FCEIL:
3818  case ISD::FFLOOR:
3819  case ISD::FRINT:
3820  case ISD::FROUND:
3821  case ISD::FROUNDEVEN:
3822  return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3823  case ISD::VECREDUCE_ADD:
3824  case ISD::VECREDUCE_UMAX:
3825  case ISD::VECREDUCE_SMAX:
3826  case ISD::VECREDUCE_UMIN:
3827  case ISD::VECREDUCE_SMIN:
3828  return lowerVECREDUCE(Op, DAG);
3829  case ISD::VECREDUCE_AND:
3830  case ISD::VECREDUCE_OR:
3831  case ISD::VECREDUCE_XOR:
3832  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3833  return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
3834  return lowerVECREDUCE(Op, DAG);
3835  case ISD::VECREDUCE_FADD:
3837  case ISD::VECREDUCE_FMIN:
3838  case ISD::VECREDUCE_FMAX:
3839  return lowerFPVECREDUCE(Op, DAG);
3840  case ISD::VP_REDUCE_ADD:
3841  case ISD::VP_REDUCE_UMAX:
3842  case ISD::VP_REDUCE_SMAX:
3843  case ISD::VP_REDUCE_UMIN:
3844  case ISD::VP_REDUCE_SMIN:
3845  case ISD::VP_REDUCE_FADD:
3846  case ISD::VP_REDUCE_SEQ_FADD:
3847  case ISD::VP_REDUCE_FMIN:
3848  case ISD::VP_REDUCE_FMAX:
3849  return lowerVPREDUCE(Op, DAG);
3850  case ISD::VP_REDUCE_AND:
3851  case ISD::VP_REDUCE_OR:
3852  case ISD::VP_REDUCE_XOR:
3853  if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
3854  return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
3855  return lowerVPREDUCE(Op, DAG);
3856  case ISD::INSERT_SUBVECTOR:
3857  return lowerINSERT_SUBVECTOR(Op, DAG);
3859  return lowerEXTRACT_SUBVECTOR(Op, DAG);
3860  case ISD::STEP_VECTOR:
3861  return lowerSTEP_VECTOR(Op, DAG);
3862  case ISD::VECTOR_REVERSE:
3863  return lowerVECTOR_REVERSE(Op, DAG);
3864  case ISD::VECTOR_SPLICE:
3865  return lowerVECTOR_SPLICE(Op, DAG);
3866  case ISD::BUILD_VECTOR:
3867  return lowerBUILD_VECTOR(Op, DAG, Subtarget);
3868  case ISD::SPLAT_VECTOR:
3869  if (Op.getValueType().getVectorElementType() == MVT::i1)
3870  return lowerVectorMaskSplat(Op, DAG);
3871  return SDValue();
3872  case ISD::VECTOR_SHUFFLE:
3873  return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
3874  case ISD::CONCAT_VECTORS: {
3875  // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
3876  // better than going through the stack, as the default expansion does.
3877  SDLoc DL(Op);
3878  MVT VT = Op.getSimpleValueType();
3879  unsigned NumOpElts =
3880  Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
3881  SDValue Vec = DAG.getUNDEF(VT);
3882  for (const auto &OpIdx : enumerate(Op->ops())) {
3883  SDValue SubVec = OpIdx.value();
3884  // Don't insert undef subvectors.
3885  if (SubVec.isUndef())
3886  continue;
3887  Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
3888  DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
3889  }
3890  return Vec;
3891  }
3892  case ISD::LOAD:
3893  if (auto V = expandUnalignedRVVLoad(Op, DAG))
3894  return V;
3895  if (Op.getValueType().isFixedLengthVector())
3896  return lowerFixedLengthVectorLoadToRVV(Op, DAG);
3897  return Op;
3898  case ISD::STORE:
3899  if (auto V = expandUnalignedRVVStore(Op, DAG))
3900  return V;
3901  if (Op.getOperand(1).getValueType().isFixedLengthVector())
3902  return lowerFixedLengthVectorStoreToRVV(Op, DAG);
3903  return Op;
3904  case ISD::MLOAD:
3905  case ISD::VP_LOAD:
3906  return lowerMaskedLoad(Op, DAG);
3907  case ISD::MSTORE:
3908  case ISD::VP_STORE:
3909  return lowerMaskedStore(Op, DAG);
3910  case ISD::SELECT_CC: {
3911  // This occurs because we custom legalize SETGT and SETUGT for setcc. That
3912  // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
3913  // into separate SETCC+SELECT_CC just like LegalizeDAG.
3914  SDValue Tmp1 = Op.getOperand(0);
3915  SDValue Tmp2 = Op.getOperand(1);
3916  SDValue True = Op.getOperand(2);
3917  SDValue False = Op.getOperand(3);
3918  EVT VT = Op.getValueType();
3919  SDValue CC = Op.getOperand(4);
3920  EVT CmpVT = Tmp1.getValueType();
3921  EVT CCVT =
3922  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
3923  SDLoc DL(Op);
3924  SDValue Cond =
3925  DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
3926  return DAG.getSelect(DL, VT, Cond, True, False);
3927  }
3928  case ISD::SETCC: {
3929  MVT OpVT = Op.getOperand(0).getSimpleValueType();
3930  if (OpVT.isScalarInteger()) {
3931  MVT VT = Op.getSimpleValueType();
3932  SDValue LHS = Op.getOperand(0);
3933  SDValue RHS = Op.getOperand(1);
3934  ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3935  assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
3936  "Unexpected CondCode");
3937 
3938  SDLoc DL(Op);
3939 
3940  // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
3941  // convert this to the equivalent of (set(u)ge X, C+1) by using
3942  // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
3943  // in a register.
3944  if (isa<ConstantSDNode>(RHS)) {
3945  int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
3946  if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
3947  // X > -1 should have been replaced with false.
3948  assert((CCVal != ISD::SETUGT || Imm != -1) &&
3949  "Missing canonicalization");
3950  // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
3951  CCVal = ISD::getSetCCSwappedOperands(CCVal);
3952  SDValue SetCC = DAG.getSetCC(
3953  DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
3954  return DAG.getLogicalNOT(DL, SetCC, VT);
3955  }
3956  }
3957 
3958  // Not a constant we could handle, swap the operands and condition code to
3959  // SETLT/SETULT.
3960  CCVal = ISD::getSetCCSwappedOperands(CCVal);
3961  return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
3962  }
3963 
3964  return lowerFixedLengthVectorSetccToRVV(Op, DAG);
3965  }
3966  case ISD::ADD:
3967  return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true);
3968  case ISD::SUB:
3969  return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true);
3970  case ISD::MUL:
3971  return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true);
3972  case ISD::MULHS:
3973  return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL, /*HasMergeOp*/ true);
3974  case ISD::MULHU:
3975  return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL, /*HasMergeOp*/ true);
3976  case ISD::AND:
3977  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
3979  case ISD::OR:
3980  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
3981  RISCVISD::OR_VL);
3982  case ISD::XOR:
3983  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
3985  case ISD::SDIV:
3986  return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true);
3987  case ISD::SREM:
3988  return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true);
3989  case ISD::UDIV:
3990  return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true);
3991  case ISD::UREM:
3992  return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true);
3993  case ISD::SHL:
3994  case ISD::SRA:
3995  case ISD::SRL:
3996  if (Op.getSimpleValueType().isFixedLengthVector())
3997  return lowerFixedLengthVectorShiftToRVV(Op, DAG);
3998  // This can be called for an i32 shift amount that needs to be promoted.
3999  assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
4000  "Unexpected custom legalisation");
4001  return SDValue();
4002  case ISD::SADDSAT:
4003  return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL,
4004  /*HasMergeOp*/ true);
4005  case ISD::UADDSAT:
4006  return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL,
4007  /*HasMergeOp*/ true);
4008  case ISD::SSUBSAT:
4009  return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL,
4010  /*HasMergeOp*/ true);
4011  case ISD::USUBSAT:
4012  return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL,
4013  /*HasMergeOp*/ true);
4014  case ISD::FADD:
4015  return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true);
4016  case ISD::FSUB:
4017  return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true);
4018  case ISD::FMUL:
4019  return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true);
4020  case ISD::FDIV:
4021  return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true);
4022  case ISD::FNEG:
4023  return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
4024  case ISD::FABS:
4025  return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
4026  case ISD::FSQRT:
4027  return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
4028  case ISD::FMA:
4029  return lowerToScalableOp(Op, DAG, RISCVISD::VFMADD_VL);
4030  case ISD::SMIN:
4031  return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true);
4032  case ISD::SMAX:
4033  return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true);
4034  case ISD::UMIN:
4035  return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
4036  case ISD::UMAX:
4037  return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
4038  case ISD::FMINNUM:
4039  return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL,
4040  /*HasMergeOp*/ true);
4041  case ISD::FMAXNUM:
4042  return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL,
4043  /*HasMergeOp*/ true);
4044  case ISD::ABS:
4045  return lowerABS(Op, DAG);
4046  case ISD::CTLZ_ZERO_UNDEF:
4047  case ISD::CTTZ_ZERO_UNDEF:
4048  return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
4049  case ISD::VSELECT:
4050  return lowerFixedLengthVectorSelectToRVV(Op, DAG);
4051  case ISD::FCOPYSIGN:
4052  return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
4053  case ISD::MGATHER:
4054  case ISD::VP_GATHER:
4055  return lowerMaskedGather(Op, DAG);
4056  case ISD::MSCATTER:
4057  case ISD::VP_SCATTER:
4058  return lowerMaskedScatter(Op, DAG);
4059  case ISD::FLT_ROUNDS_:
4060  return lowerGET_ROUNDING(Op, DAG);
4061  case ISD::SET_ROUNDING:
4062  return lowerSET_ROUNDING(Op, DAG);
4063  case ISD::EH_DWARF_CFA:
4064  return lowerEH_DWARF_CFA(Op, DAG);
4065  case ISD::VP_SELECT:
4066  return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
4067  case ISD::VP_MERGE:
4068  return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
4069  case ISD::VP_ADD:
4070  return lowerVPOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true);
4071  case ISD::VP_SUB:
4072  return lowerVPOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true);
4073  case ISD::VP_MUL:
4074  return lowerVPOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true);
4075  case ISD::VP_SDIV:
4076  return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true);
4077  case ISD::VP_UDIV:
4078  return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true);
4079  case ISD::VP_SREM:
4080  return lowerVPOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true);
4081  case ISD::VP_UREM:
4082  return lowerVPOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true);
4083  case ISD::VP_AND:
4084  return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
4085  case ISD::VP_OR:
4086  return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
4087  case ISD::VP_XOR:
4088  return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
4089  case ISD::VP_ASHR:
4090  return lowerVPOp(Op, DAG, RISCVISD::SRA_VL, /*HasMergeOp*/ true);
4091  case ISD::VP_LSHR:
4092  return lowerVPOp(Op, DAG, RISCVISD::SRL_VL, /*HasMergeOp*/ true);
4093  case ISD::VP_SHL:
4094  return lowerVPOp(Op, DAG, RISCVISD::SHL_VL, /*HasMergeOp*/ true);
4095  case ISD::VP_FADD:
4096  return lowerVPOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true);
4097  case ISD::VP_FSUB:
4098  return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true);
4099  case ISD::VP_FMUL:
4100  return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true);
4101  case ISD::VP_FDIV:
4102  return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true);
4103  case ISD::VP_FNEG:
4104  return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
4105  case ISD::VP_FABS:
4106  return lowerVPOp(Op, DAG, RISCVISD::FABS_VL);
4107  case ISD::VP_SQRT:
4108  return lowerVPOp(Op, DAG, RISCVISD::FSQRT_VL);
4109  case ISD::VP_FMA:
4110  return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
4111  case ISD::VP_FMINNUM:
4112  return lowerVPOp(Op, DAG, RISCVISD::FMINNUM_VL, /*HasMergeOp*/ true);
4113  case ISD::VP_FMAXNUM:
4114  return lowerVPOp(Op, DAG, RISCVISD::FMAXNUM_VL, /*HasMergeOp*/ true);
4115  case ISD::VP_FCOPYSIGN:
4116  return lowerVPOp(Op, DAG, RISCVISD::FCOPYSIGN_VL, /*HasMergeOp*/ true);
4117  case ISD::VP_SIGN_EXTEND:
4118  case ISD::VP_ZERO_EXTEND:
4119  if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
4120  return lowerVPExtMaskOp(Op, DAG);
4121  return lowerVPOp(Op, DAG,
4122  Op.getOpcode() == ISD::VP_SIGN_EXTEND
4124  : RISCVISD::VZEXT_VL);
4125  case ISD::VP_TRUNCATE:
4126  return lowerVectorTruncLike(Op, DAG);
4127  case ISD::VP_FP_EXTEND:
4128  case ISD::VP_FP_ROUND:
4129  return lowerVectorFPExtendOrRoundLike(Op, DAG);
4130  case ISD::VP_FP_TO_SINT:
4131  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_X_F_VL);
4132  case ISD::VP_FP_TO_UINT:
4133  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_XU_F_VL);
4134  case ISD::VP_SINT_TO_FP:
4135  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
4136  case ISD::VP_UINT_TO_FP:
4137  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
4138  case ISD::VP_SETCC:
4139  if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
4140  return lowerVPSetCCMaskOp(Op, DAG);
4141  return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true);
4142  case ISD::VP_SMIN:
4143  return lowerVPOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true);
4144  case ISD::VP_SMAX:
4145  return lowerVPOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true);
4146  case ISD::VP_UMIN:
4147  return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
4148  case ISD::VP_UMAX:
4149  return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
4150  case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
4151  return lowerVPStridedLoad(Op, DAG);
4152  case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
4153  return lowerVPStridedStore(Op, DAG);
4154  case ISD::VP_FCEIL:
4155  case ISD::VP_FFLOOR:
4156  case ISD::VP_FRINT:
4157  case ISD::VP_FNEARBYINT:
4158  case ISD::VP_FROUND:
4159  case ISD::VP_FROUNDEVEN:
4160  case ISD::VP_FROUNDTOZERO:
4161  return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
4162  }
4163 }
4164 
4166  SelectionDAG &DAG, unsigned Flags) {
4167  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
4168 }
4169 
4171  SelectionDAG &DAG, unsigned Flags) {
4172  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
4173  Flags);
4174 }
4175 
4177  SelectionDAG &DAG, unsigned Flags) {
4178  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
4179  N->getOffset(), Flags);
4180 }
4181 
4183  SelectionDAG &DAG, unsigned Flags) {
4184  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
4185 }
4186 
4187 template <class NodeTy>
4188 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4189  bool IsLocal) const {
4190  SDLoc DL(N);
4191  EVT Ty = getPointerTy(DAG.getDataLayout());
4192 
4193  if (isPositionIndependent()) {
4194  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
4195  if (IsLocal)
4196  // Use PC-relative addressing to access the symbol. This generates the
4197  // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
4198  // %pcrel_lo(auipc)).
4199  return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
4200 
4201  // Use PC-relative addressing to access the GOT for this symbol, then load
4202  // the address from the GOT. This generates the pattern (PseudoLA sym),
4203  // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
4204  MachineFunction &MF = DAG.getMachineFunction();
4209  LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4210  SDValue Load =
4212  {DAG.getEntryNode(), Addr}, Ty, MemOp);
4213  return Load;
4214  }
4215 
4216  switch (getTargetMachine().getCodeModel()) {
4217  default:
4218  report_fatal_error("Unsupported code model for lowering");
4219  case CodeModel::Small: {
4220  // Generate a sequence for accessing addresses within the first 2 GiB of
4221  // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
4222  SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
4223  SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
4224  SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
4225  return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
4226  }
4227  case CodeModel::Medium: {
4228  // Generate a sequence for accessing addresses within any 2GiB range within
4229  // the address space. This generates the pattern (PseudoLLA sym), which
4230  // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
4231  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
4232  return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
4233  }
4234  }
4235 }
4236 
4237 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
4238  SelectionDAG &DAG) const {
4239  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4240  assert(N->getOffset() == 0 && "unexpected offset in global node");
4241  return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
4242 }
4243 
4244 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
4245  SelectionDAG &DAG) const {
4246  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
4247 
4248  return getAddr(N, DAG);
4249 }
4250 
4251 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
4252  SelectionDAG &DAG) const {
4253  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
4254 
4255  return getAddr(N, DAG);
4256 }
4257 
4258 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
4259  SelectionDAG &DAG) const {
4260