LLVM  14.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
16 #include "RISCV.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
30 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/IR/PatternMatch.h"
35 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/KnownBits.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "riscv-lower"
44 
45 STATISTIC(NumTailCalls, "Number of tail calls");
46 
48  const RISCVSubtarget &STI)
49  : TargetLowering(TM), Subtarget(STI) {
50 
51  if (Subtarget.isRV32E())
52  report_fatal_error("Codegen not yet implemented for RV32E");
53 
54  RISCVABI::ABI ABI = Subtarget.getTargetABI();
55  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
56 
58  !Subtarget.hasStdExtF()) {
59  errs() << "Hard-float 'f' ABI can't be used for a target that "
60  "doesn't support the F instruction set extension (ignoring "
61  "target-abi)\n";
63  } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
64  !Subtarget.hasStdExtD()) {
65  errs() << "Hard-float 'd' ABI can't be used for a target that "
66  "doesn't support the D instruction set extension (ignoring "
67  "target-abi)\n";
69  }
70 
71  switch (ABI) {
72  default:
73  report_fatal_error("Don't know how to lower this ABI");
77  case RISCVABI::ABI_LP64:
80  break;
81  }
82 
83  MVT XLenVT = Subtarget.getXLenVT();
84 
85  // Set up the register classes.
86  addRegisterClass(XLenVT, &RISCV::GPRRegClass);
87 
88  if (Subtarget.hasStdExtZfh())
89  addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
90  if (Subtarget.hasStdExtF())
91  addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
92  if (Subtarget.hasStdExtD())
93  addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
94 
95  static const MVT::SimpleValueType BoolVecVTs[] = {
98  static const MVT::SimpleValueType IntVecVTs[] = {
104  static const MVT::SimpleValueType F16VecVTs[] = {
107  static const MVT::SimpleValueType F32VecVTs[] = {
109  static const MVT::SimpleValueType F64VecVTs[] = {
111 
112  if (Subtarget.hasStdExtV()) {
113  auto addRegClassForRVV = [this](MVT VT) {
114  unsigned Size = VT.getSizeInBits().getKnownMinValue();
115  assert(Size <= 512 && isPowerOf2_32(Size));
116  const TargetRegisterClass *RC;
117  if (Size <= 64)
118  RC = &RISCV::VRRegClass;
119  else if (Size == 128)
120  RC = &RISCV::VRM2RegClass;
121  else if (Size == 256)
122  RC = &RISCV::VRM4RegClass;
123  else
124  RC = &RISCV::VRM8RegClass;
125 
126  addRegisterClass(VT, RC);
127  };
128 
129  for (MVT VT : BoolVecVTs)
130  addRegClassForRVV(VT);
131  for (MVT VT : IntVecVTs)
132  addRegClassForRVV(VT);
133 
134  if (Subtarget.hasStdExtZfh())
135  for (MVT VT : F16VecVTs)
136  addRegClassForRVV(VT);
137 
138  if (Subtarget.hasStdExtF())
139  for (MVT VT : F32VecVTs)
140  addRegClassForRVV(VT);
141 
142  if (Subtarget.hasStdExtD())
143  for (MVT VT : F64VecVTs)
144  addRegClassForRVV(VT);
145 
146  if (Subtarget.useRVVForFixedLengthVectors()) {
147  auto addRegClassForFixedVectors = [this](MVT VT) {
148  MVT ContainerVT = getContainerForFixedLengthVector(VT);
149  unsigned RCID = getRegClassIDForVecVT(ContainerVT);
150  const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
151  addRegisterClass(VT, TRI.getRegClass(RCID));
152  };
154  if (useRVVForFixedLengthVectorVT(VT))
155  addRegClassForFixedVectors(VT);
156 
158  if (useRVVForFixedLengthVectorVT(VT))
159  addRegClassForFixedVectors(VT);
160  }
161  }
162 
163  // Compute derived properties from the register classes.
165 
167 
168  for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
169  setLoadExtAction(N, XLenVT, MVT::i1, Promote);
170 
171  // TODO: add all necessary setOperationAction calls.
173 
178 
181 
186 
188  if (!Subtarget.hasStdExtZbb()) {
191  }
192 
193  if (Subtarget.is64Bit()) {
199 
204  } else {
205  setLibcallName(RTLIB::SHL_I128, nullptr);
206  setLibcallName(RTLIB::SRL_I128, nullptr);
207  setLibcallName(RTLIB::SRA_I128, nullptr);
208  setLibcallName(RTLIB::MUL_I128, nullptr);
209  setLibcallName(RTLIB::MULO_I64, nullptr);
210  }
211 
212  if (!Subtarget.hasStdExtM()) {
220  } else {
221  if (Subtarget.is64Bit()) {
224 
234  } else {
236  }
237  }
238 
243 
247 
248  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
249  if (Subtarget.is64Bit()) {
252  }
253  } else {
256  }
257 
258  if (Subtarget.hasStdExtZbp()) {
259  // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
260  // more combining.
264  // BSWAP i8 doesn't exist.
267 
268  if (Subtarget.is64Bit()) {
271  }
272  } else {
273  // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
274  // pattern match it directly in isel.
276  Subtarget.hasStdExtZbb() ? Legal : Expand);
277  }
278 
279  if (Subtarget.hasStdExtZbb()) {
284 
285  if (Subtarget.is64Bit()) {
290  }
291  } else {
295  }
296 
297  if (Subtarget.hasStdExtZbt()) {
301 
302  if (Subtarget.is64Bit()) {
305  }
306  } else {
308  }
309 
310  static const ISD::CondCode FPCCToExpand[] = {
314 
315  static const ISD::NodeType FPOpToExpand[] = {
318 
319  if (Subtarget.hasStdExtZfh())
321 
322  if (Subtarget.hasStdExtZfh()) {
329  for (auto CC : FPCCToExpand)
334  for (auto Op : FPOpToExpand)
336  }
337 
338  if (Subtarget.hasStdExtF()) {
345  for (auto CC : FPCCToExpand)
350  for (auto Op : FPOpToExpand)
354  }
355 
356  if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
358 
359  if (Subtarget.hasStdExtD()) {
366  for (auto CC : FPCCToExpand)
373  for (auto Op : FPOpToExpand)
377  }
378 
379  if (Subtarget.is64Bit()) {
384  }
385 
386  if (Subtarget.hasStdExtF()) {
389 
392  }
393 
398 
400 
401  // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
402  // Unfortunately this can't be determined just from the ISA naming string.
404  Subtarget.is64Bit() ? Legal : Custom);
405 
409  if (Subtarget.is64Bit())
411 
412  if (Subtarget.hasStdExtA()) {
415  } else {
417  }
418 
420 
421  if (Subtarget.hasStdExtV()) {
423 
425 
426  // RVV intrinsics may have illegal operands.
427  // We also need to custom legalize vmv.x.s.
432  if (Subtarget.is64Bit()) {
434  } else {
437  }
438 
441 
442  static const unsigned IntegerVPOps[] = {
443  ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
444  ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
445  ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
446  ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
447  ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
448  ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
449  ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN};
450 
451  static const unsigned FloatingPointVPOps[] = {
452  ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
453  ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
454  ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX};
455 
456  if (!Subtarget.is64Bit()) {
457  // We must custom-lower certain vXi64 operations on RV32 due to the vector
458  // element type being illegal.
461 
470 
471  setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
472  setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
473  setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
474  setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
475  setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
476  setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
477  setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
478  setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
479  }
480 
481  for (MVT VT : BoolVecVTs) {
483 
484  // Mask VTs are custom-expanded into a series of standard nodes
489 
492 
496 
500 
501  setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
502  setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
503  setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
504 
505  // RVV has native int->float & float->int conversions where the
506  // element type sizes are within one power-of-two of each other. Any
507  // wider distances between type sizes have to be lowered as sequences
508  // which progressively narrow the gap in stages.
513 
514  // Expand all extending loads to types larger than this, and truncating
515  // stores from types larger than this.
516  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
517  setTruncStoreAction(OtherVT, VT, Expand);
518  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
519  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
520  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
521  }
522  }
523 
524  for (MVT VT : IntVecVTs) {
527 
532 
535 
536  // Custom-lower extensions and truncations from/to mask types.
540 
541  // RVV has native int->float & float->int conversions where the
542  // element type sizes are within one power-of-two of each other. Any
543  // wider distances between type sizes have to be lowered as sequences
544  // which progressively narrow the gap in stages.
549 
554 
555  // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
556  // nodes which truncate by one power of two at a time.
558 
559  // Custom-lower insert/extract operations to simplify patterns.
562 
563  // Custom-lower reduction operations to set up the corresponding custom
564  // nodes' operands.
573 
574  for (unsigned VPOpc : IntegerVPOps)
575  setOperationAction(VPOpc, VT, Custom);
576 
579 
584 
585  setOperationAction(ISD::VP_LOAD, VT, Custom);
586  setOperationAction(ISD::VP_STORE, VT, Custom);
587  setOperationAction(ISD::VP_GATHER, VT, Custom);
588  setOperationAction(ISD::VP_SCATTER, VT, Custom);
589 
593 
596 
599 
600  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
601  setTruncStoreAction(VT, OtherVT, Expand);
602  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
603  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
604  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
605  }
606  }
607 
608  // Expand various CCs to best match the RVV ISA, which natively supports UNE
609  // but no other unordered comparisons, and supports all ordered comparisons
610  // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
611  // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
612  // and we pattern-match those back to the "original", swapping operands once
613  // more. This way we catch both operations and both "vf" and "fv" forms with
614  // fewer patterns.
615  static const ISD::CondCode VFPCCToExpand[] = {
619  };
620 
621  // Sets common operation actions on RVV floating-point vector types.
622  const auto SetCommonVFPActions = [&](MVT VT) {
624  // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
625  // sizes are within one power-of-two of each other. Therefore conversions
626  // between vXf16 and vXf64 must be lowered as sequences which convert via
627  // vXf32.
630  // Custom-lower insert/extract operations to simplify patterns.
633  // Expand various condition codes (explained above).
634  for (auto CC : VFPCCToExpand)
635  setCondCodeAction(CC, VT, Expand);
636 
639 
644 
646 
649 
654 
655  setOperationAction(ISD::VP_LOAD, VT, Custom);
656  setOperationAction(ISD::VP_STORE, VT, Custom);
657  setOperationAction(ISD::VP_GATHER, VT, Custom);
658  setOperationAction(ISD::VP_SCATTER, VT, Custom);
659 
662 
666 
668 
669  for (unsigned VPOpc : FloatingPointVPOps)
670  setOperationAction(VPOpc, VT, Custom);
671  };
672 
673  // Sets common extload/truncstore actions on RVV floating-point vector
674  // types.
675  const auto SetCommonVFPExtLoadTruncStoreActions =
676  [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
677  for (auto SmallVT : SmallerVTs) {
678  setTruncStoreAction(VT, SmallVT, Expand);
679  setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
680  }
681  };
682 
683  if (Subtarget.hasStdExtZfh())
684  for (MVT VT : F16VecVTs)
685  SetCommonVFPActions(VT);
686 
687  for (MVT VT : F32VecVTs) {
688  if (Subtarget.hasStdExtF())
689  SetCommonVFPActions(VT);
690  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
691  }
692 
693  for (MVT VT : F64VecVTs) {
694  if (Subtarget.hasStdExtD())
695  SetCommonVFPActions(VT);
696  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
697  SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
698  }
699 
700  if (Subtarget.useRVVForFixedLengthVectors()) {
702  if (!useRVVForFixedLengthVectorVT(VT))
703  continue;
704 
705  // By default everything must be expanded.
706  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
708  for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
709  setTruncStoreAction(VT, OtherVT, Expand);
710  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
711  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
712  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
713  }
714 
715  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
718 
721 
724 
727 
729 
731 
733 
735 
739 
740  setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
741  setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
742  setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
743 
748 
749  // Operations below are different for between masks and other vectors.
750  if (VT.getVectorElementType() == MVT::i1) {
754  continue;
755  }
756 
757  // Use SPLAT_VECTOR to prevent type legalization from destroying the
758  // splats when type legalizing i64 scalar on RV32.
759  // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
760  // improvements first.
761  if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
764  }
765 
768 
773 
774  setOperationAction(ISD::VP_LOAD, VT, Custom);
775  setOperationAction(ISD::VP_STORE, VT, Custom);
776  setOperationAction(ISD::VP_GATHER, VT, Custom);
777  setOperationAction(ISD::VP_SCATTER, VT, Custom);
778 
792 
798 
801 
806 
809 
813 
814  // Custom-lower reduction operations to set up the corresponding custom
815  // nodes' operands.
821 
822  for (unsigned VPOpc : IntegerVPOps)
823  setOperationAction(VPOpc, VT, Custom);
824  }
825 
827  if (!useRVVForFixedLengthVectorVT(VT))
828  continue;
829 
830  // By default everything must be expanded.
831  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
833  for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
834  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
835  setTruncStoreAction(VT, OtherVT, Expand);
836  }
837 
838  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
841 
847 
854 
855  setOperationAction(ISD::VP_LOAD, VT, Custom);
856  setOperationAction(ISD::VP_STORE, VT, Custom);
857  setOperationAction(ISD::VP_GATHER, VT, Custom);
858  setOperationAction(ISD::VP_SCATTER, VT, Custom);
859 
871 
874 
875  for (auto CC : VFPCCToExpand)
876  setCondCodeAction(CC, VT, Expand);
877 
881 
883 
888 
889  for (unsigned VPOpc : FloatingPointVPOps)
890  setOperationAction(VPOpc, VT, Custom);
891  }
892 
893  // Custom-legalize bitcasts from fixed-length vectors to scalar types.
901  }
902  }
903 
904  // Function alignments.
905  const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
906  setMinFunctionAlignment(FunctionAlignment);
907  setPrefFunctionAlignment(FunctionAlignment);
908 
910 
911  // Jumps are expensive, compared to logic
913 
914  // We can use any register for comparisons
916 
924  if (Subtarget.hasStdExtV()) {
928  setTargetDAGCombine(ISD::VP_GATHER);
929  setTargetDAGCombine(ISD::VP_SCATTER);
934  }
935 }
936 
939  EVT VT) const {
940  if (!VT.isVector())
941  return getPointerTy(DL);
942  if (Subtarget.hasStdExtV() &&
943  (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
946 }
947 
948 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
949  return Subtarget.getXLenVT();
950 }
951 
953  const CallInst &I,
954  MachineFunction &MF,
955  unsigned Intrinsic) const {
956  auto &DL = I.getModule()->getDataLayout();
957  switch (Intrinsic) {
958  default:
959  return false;
960  case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
961  case Intrinsic::riscv_masked_atomicrmw_add_i32:
962  case Intrinsic::riscv_masked_atomicrmw_sub_i32:
963  case Intrinsic::riscv_masked_atomicrmw_nand_i32:
964  case Intrinsic::riscv_masked_atomicrmw_max_i32:
965  case Intrinsic::riscv_masked_atomicrmw_min_i32:
966  case Intrinsic::riscv_masked_atomicrmw_umax_i32:
967  case Intrinsic::riscv_masked_atomicrmw_umin_i32:
968  case Intrinsic::riscv_masked_cmpxchg_i32: {
969  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
971  Info.memVT = MVT::getVT(PtrTy->getElementType());
972  Info.ptrVal = I.getArgOperand(0);
973  Info.offset = 0;
974  Info.align = Align(4);
977  return true;
978  }
979  case Intrinsic::riscv_masked_strided_load:
981  Info.ptrVal = I.getArgOperand(1);
982  Info.memVT = getValueType(DL, I.getType()->getScalarType());
983  Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
986  return true;
987  case Intrinsic::riscv_masked_strided_store:
989  Info.ptrVal = I.getArgOperand(1);
990  Info.memVT =
991  getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
992  Info.align = Align(
993  DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
994  8);
997  return true;
998  }
999 }
1000 
1002  const AddrMode &AM, Type *Ty,
1003  unsigned AS,
1004  Instruction *I) const {
1005  // No global is ever allowed as a base.
1006  if (AM.BaseGV)
1007  return false;
1008 
1009  // Require a 12-bit signed offset.
1010  if (!isInt<12>(AM.BaseOffs))
1011  return false;
1012 
1013  switch (AM.Scale) {
1014  case 0: // "r+i" or just "i", depending on HasBaseReg.
1015  break;
1016  case 1:
1017  if (!AM.HasBaseReg) // allow "r+i".
1018  break;
1019  return false; // disallow "r+r" or "r+r+i".
1020  default:
1021  return false;
1022  }
1023 
1024  return true;
1025 }
1026 
1028  return isInt<12>(Imm);
1029 }
1030 
1032  return isInt<12>(Imm);
1033 }
1034 
1035 // On RV32, 64-bit integers are split into their high and low parts and held
1036 // in two different registers, so the trunc is free since the low register can
1037 // just be used.
1038 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1039  if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1040  return false;
1041  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1042  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1043  return (SrcBits == 64 && DestBits == 32);
1044 }
1045 
1046 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1047  if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1048  !SrcVT.isInteger() || !DstVT.isInteger())
1049  return false;
1050  unsigned SrcBits = SrcVT.getSizeInBits();
1051  unsigned DestBits = DstVT.getSizeInBits();
1052  return (SrcBits == 64 && DestBits == 32);
1053 }
1054 
1056  // Zexts are free if they can be combined with a load.
1057  if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1058  EVT MemVT = LD->getMemoryVT();
1059  if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
1060  (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
1061  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1062  LD->getExtensionType() == ISD::ZEXTLOAD))
1063  return true;
1064  }
1065 
1066  return TargetLowering::isZExtFree(Val, VT2);
1067 }
1068 
1070  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1071 }
1072 
1074  return Subtarget.hasStdExtZbb();
1075 }
1076 
1078  return Subtarget.hasStdExtZbb();
1079 }
1080 
1081 /// Check if sinking \p I's operands to I's basic block is profitable, because
1082 /// the operands can be folded into a target instruction, e.g.
1083 /// splats of scalars can fold into vector instructions.
1085  Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1086  using namespace llvm::PatternMatch;
1087 
1088  if (!I->getType()->isVectorTy() || !Subtarget.hasStdExtV())
1089  return false;
1090 
1091  auto IsSinker = [&](Instruction *I, int Operand) {
1092  switch (I->getOpcode()) {
1093  case Instruction::Add:
1094  case Instruction::Sub:
1095  case Instruction::Mul:
1096  case Instruction::And:
1097  case Instruction::Or:
1098  case Instruction::Xor:
1099  case Instruction::FAdd:
1100  case Instruction::FSub:
1101  case Instruction::FMul:
1102  case Instruction::FDiv:
1103  return true;
1104  case Instruction::Shl:
1105  case Instruction::LShr:
1106  case Instruction::AShr:
1107  return Operand == 1;
1108  case Instruction::Call:
1109  if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1110  switch (II->getIntrinsicID()) {
1111  case Intrinsic::fma:
1112  return Operand == 0 || Operand == 1;
1113  default:
1114  return false;
1115  }
1116  }
1117  return false;
1118  default:
1119  return false;
1120  }
1121  };
1122 
1123  for (auto OpIdx : enumerate(I->operands())) {
1124  if (!IsSinker(I, OpIdx.index()))
1125  continue;
1126 
1127  Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1128  // Make sure we are not already sinking this operand
1129  if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1130  continue;
1131 
1132  // We are looking for a splat that can be sunk.
1134  m_Undef(), m_ZeroMask())))
1135  continue;
1136 
1137  // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1138  // and vector registers
1139  for (Use &U : Op->uses()) {
1140  Instruction *Insn = cast<Instruction>(U.getUser());
1141  if (!IsSinker(Insn, U.getOperandNo()))
1142  return false;
1143  }
1144 
1145  Ops.push_back(&Op->getOperandUse(0));
1146  Ops.push_back(&OpIdx.value());
1147  }
1148  return true;
1149 }
1150 
1152  bool ForCodeSize) const {
1153  if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1154  return false;
1155  if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1156  return false;
1157  if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1158  return false;
1159  if (Imm.isNegZero())
1160  return false;
1161  return Imm.isZero();
1162 }
1163 
1165  return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1166  (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1167  (VT == MVT::f64 && Subtarget.hasStdExtD());
1168 }
1169 
1171  CallingConv::ID CC,
1172  EVT VT) const {
1173  // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
1174  // end up using a GPR but that will be decided based on ABI.
1175  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1176  return MVT::f32;
1177 
1179 }
1180 
1182  CallingConv::ID CC,
1183  EVT VT) const {
1184  // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
1185  // end up using a GPR but that will be decided based on ABI.
1186  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1187  return 1;
1188 
1190 }
1191 
1192 // Changes the condition code and swaps operands if necessary, so the SetCC
1193 // operation matches one of the comparisons supported directly by branches
1194 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1195 // with 1/-1.
1196 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1197  ISD::CondCode &CC, SelectionDAG &DAG) {
1198  // Convert X > -1 to X >= 0.
1199  if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1200  RHS = DAG.getConstant(0, DL, RHS.getValueType());
1201  CC = ISD::SETGE;
1202  return;
1203  }
1204  // Convert X < 1 to 0 >= X.
1205  if (CC == ISD::SETLT && isOneConstant(RHS)) {
1206  RHS = LHS;
1207  LHS = DAG.getConstant(0, DL, RHS.getValueType());
1208  CC = ISD::SETGE;
1209  return;
1210  }
1211 
1212  switch (CC) {
1213  default:
1214  break;
1215  case ISD::SETGT:
1216  case ISD::SETLE:
1217  case ISD::SETUGT:
1218  case ISD::SETULE:
1220  std::swap(LHS, RHS);
1221  break;
1222  }
1223 }
1224 
1226  assert(VT.isScalableVector() && "Expecting a scalable vector type");
1227  unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1228  if (VT.getVectorElementType() == MVT::i1)
1229  KnownSize *= 8;
1230 
1231  switch (KnownSize) {
1232  default:
1233  llvm_unreachable("Invalid LMUL.");
1234  case 8:
1235  return RISCVII::VLMUL::LMUL_F8;
1236  case 16:
1237  return RISCVII::VLMUL::LMUL_F4;
1238  case 32:
1239  return RISCVII::VLMUL::LMUL_F2;
1240  case 64:
1241  return RISCVII::VLMUL::LMUL_1;
1242  case 128:
1243  return RISCVII::VLMUL::LMUL_2;
1244  case 256:
1245  return RISCVII::VLMUL::LMUL_4;
1246  case 512:
1247  return RISCVII::VLMUL::LMUL_8;
1248  }
1249 }
1250 
1252  switch (LMul) {
1253  default:
1254  llvm_unreachable("Invalid LMUL.");
1259  return RISCV::VRRegClassID;
1261  return RISCV::VRM2RegClassID;
1263  return RISCV::VRM4RegClassID;
1265  return RISCV::VRM8RegClassID;
1266  }
1267 }
1268 
1270  RISCVII::VLMUL LMUL = getLMUL(VT);
1271  if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1275  static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1276  "Unexpected subreg numbering");
1277  return RISCV::sub_vrm1_0 + Index;
1278  }
1279  if (LMUL == RISCVII::VLMUL::LMUL_2) {
1280  static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1281  "Unexpected subreg numbering");
1282  return RISCV::sub_vrm2_0 + Index;
1283  }
1284  if (LMUL == RISCVII::VLMUL::LMUL_4) {
1285  static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1286  "Unexpected subreg numbering");
1287  return RISCV::sub_vrm4_0 + Index;
1288  }
1289  llvm_unreachable("Invalid vector type.");
1290 }
1291 
1293  if (VT.getVectorElementType() == MVT::i1)
1294  return RISCV::VRRegClassID;
1295  return getRegClassIDForLMUL(getLMUL(VT));
1296 }
1297 
1298 // Attempt to decompose a subvector insert/extract between VecVT and
1299 // SubVecVT via subregister indices. Returns the subregister index that
1300 // can perform the subvector insert/extract with the given element index, as
1301 // well as the index corresponding to any leftover subvectors that must be
1302 // further inserted/extracted within the register class for SubVecVT.
1303 std::pair<unsigned, unsigned>
1305  MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1306  const RISCVRegisterInfo *TRI) {
1307  static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1308  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1309  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1310  "Register classes not ordered");
1311  unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1312  unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1313  // Try to compose a subregister index that takes us from the incoming
1314  // LMUL>1 register class down to the outgoing one. At each step we half
1315  // the LMUL:
1316  // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1317  // Note that this is not guaranteed to find a subregister index, such as
1318  // when we are extracting from one VR type to another.
1319  unsigned SubRegIdx = RISCV::NoSubRegister;
1320  for (const unsigned RCID :
1321  {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1322  if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1323  VecVT = VecVT.getHalfNumVectorElementsVT();
1324  bool IsHi =
1325  InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1326  SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1327  getSubregIndexByMVT(VecVT, IsHi));
1328  if (IsHi)
1329  InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1330  }
1331  return {SubRegIdx, InsertExtractIdx};
1332 }
1333 
1334 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1335 // stores for those types.
1336 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1337  return !Subtarget.useRVVForFixedLengthVectors() ||
1339 }
1340 
1342  if (ScalarTy->isPointerTy())
1343  return true;
1344 
1345  if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1346  ScalarTy->isIntegerTy(32) || ScalarTy->isIntegerTy(64))
1347  return true;
1348 
1349  if (ScalarTy->isHalfTy())
1350  return Subtarget.hasStdExtZfh();
1351  if (ScalarTy->isFloatTy())
1352  return Subtarget.hasStdExtF();
1353  if (ScalarTy->isDoubleTy())
1354  return Subtarget.hasStdExtD();
1355 
1356  return false;
1357 }
1358 
1359 static bool useRVVForFixedLengthVectorVT(MVT VT,
1360  const RISCVSubtarget &Subtarget) {
1361  assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1362  if (!Subtarget.useRVVForFixedLengthVectors())
1363  return false;
1364 
1365  // We only support a set of vector types with a consistent maximum fixed size
1366  // across all supported vector element types to avoid legalization issues.
1367  // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1368  // fixed-length vector type we support is 1024 bytes.
1369  if (VT.getFixedSizeInBits() > 1024 * 8)
1370  return false;
1371 
1372  unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1373 
1374  MVT EltVT = VT.getVectorElementType();
1375 
1376  // Don't use RVV for vectors we cannot scalarize if required.
1377  switch (EltVT.SimpleTy) {
1378  // i1 is supported but has different rules.
1379  default:
1380  return false;
1381  case MVT::i1:
1382  // Masks can only use a single register.
1383  if (VT.getVectorNumElements() > MinVLen)
1384  return false;
1385  MinVLen /= 8;
1386  break;
1387  case MVT::i8:
1388  case MVT::i16:
1389  case MVT::i32:
1390  case MVT::i64:
1391  break;
1392  case MVT::f16:
1393  if (!Subtarget.hasStdExtZfh())
1394  return false;
1395  break;
1396  case MVT::f32:
1397  if (!Subtarget.hasStdExtF())
1398  return false;
1399  break;
1400  case MVT::f64:
1401  if (!Subtarget.hasStdExtD())
1402  return false;
1403  break;
1404  }
1405 
1406  // Reject elements larger than ELEN.
1407  if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
1408  return false;
1409 
1410  unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1411  // Don't use RVV for types that don't fit.
1412  if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1413  return false;
1414 
1415  // TODO: Perhaps an artificial restriction, but worth having whilst getting
1416  // the base fixed length RVV support in place.
1417  if (!VT.isPow2VectorType())
1418  return false;
1419 
1420  return true;
1421 }
1422 
1423 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1425 }
1426 
1427 // Return the largest legal scalable vector type that matches VT's element type.
1429  const RISCVSubtarget &Subtarget) {
1430  // This may be called before legal types are setup.
1431  assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1432  useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1433  "Expected legal fixed length vector!");
1434 
1435  unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1436  unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
1437 
1438  MVT EltVT = VT.getVectorElementType();
1439  switch (EltVT.SimpleTy) {
1440  default:
1441  llvm_unreachable("unexpected element type for RVV container");
1442  case MVT::i1:
1443  case MVT::i8:
1444  case MVT::i16:
1445  case MVT::i32:
1446  case MVT::i64:
1447  case MVT::f16:
1448  case MVT::f32:
1449  case MVT::f64: {
1450  // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1451  // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1452  // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1453  unsigned NumElts =
1454  (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1455  NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1456  assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1457  return MVT::getScalableVectorVT(EltVT, NumElts);
1458  }
1459  }
1460 }
1461 
1463  const RISCVSubtarget &Subtarget) {
1465  Subtarget);
1466 }
1467 
1470 }
1471 
1472 // Grow V to consume an entire RVV register.
1474  const RISCVSubtarget &Subtarget) {
1475  assert(VT.isScalableVector() &&
1476  "Expected to convert into a scalable vector!");
1478  "Expected a fixed length vector operand!");
1479  SDLoc DL(V);
1480  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1481  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1482 }
1483 
1484 // Shrink V so it's just big enough to maintain a VT's worth of data.
1486  const RISCVSubtarget &Subtarget) {
1487  assert(VT.isFixedLengthVector() &&
1488  "Expected to convert into a fixed length vector!");
1490  "Expected a scalable vector operand!");
1491  SDLoc DL(V);
1492  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1493  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1494 }
1495 
1496 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1497 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1498 // the vector type that it is contained in.
1499 static std::pair<SDValue, SDValue>
1500 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1501  const RISCVSubtarget &Subtarget) {
1502  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1503  MVT XLenVT = Subtarget.getXLenVT();
1504  SDValue VL = VecVT.isFixedLengthVector()
1505  ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1506  : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1507  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1508  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1509  return {Mask, VL};
1510 }
1511 
1512 // As above but assuming the given type is a scalable vector type.
1513 static std::pair<SDValue, SDValue>
1515  const RISCVSubtarget &Subtarget) {
1516  assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1517  return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1518 }
1519 
1520 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1521 // of either is (currently) supported. This can get us into an infinite loop
1522 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1523 // as a ..., etc.
1524 // Until either (or both) of these can reliably lower any node, reporting that
1525 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1526 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1527 // which is not desirable.
1529  EVT VT, unsigned DefinedValues) const {
1530  return false;
1531 }
1532 
1534  // Only splats are currently supported.
1535  if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1536  return true;
1537 
1538  return false;
1539 }
1540 
1542  // RISCV FP-to-int conversions saturate to the destination register size, but
1543  // don't produce 0 for nan. We can use a conversion instruction and fix the
1544  // nan case with a compare and a select.
1545  SDValue Src = Op.getOperand(0);
1546 
1547  EVT DstVT = Op.getValueType();
1548  EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1549 
1550  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1551  unsigned Opc;
1552  if (SatVT == DstVT)
1553  Opc = IsSigned ? RISCVISD::FCVT_X_RTZ : RISCVISD::FCVT_XU_RTZ;
1554  else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1556  else
1557  return SDValue();
1558  // FIXME: Support other SatVTs by clamping before or after the conversion.
1559 
1560  SDLoc DL(Op);
1561  SDValue FpToInt = DAG.getNode(Opc, DL, DstVT, Src);
1562 
1563  SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1564  return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1565 }
1566 
1568  const RISCVSubtarget &Subtarget) {
1569  MVT VT = Op.getSimpleValueType();
1570  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1571 
1572  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1573 
1574  SDLoc DL(Op);
1575  SDValue Mask, VL;
1576  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1577 
1578  unsigned Opc =
1580  SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
1581  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1582 }
1583 
1584 struct VIDSequence {
1585  int64_t StepNumerator;
1587  int64_t Addend;
1588 };
1589 
1590 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1591 // to the (non-zero) step S and start value X. This can be then lowered as the
1592 // RVV sequence (VID * S) + X, for example.
1593 // The step S is represented as an integer numerator divided by a positive
1594 // denominator. Note that the implementation currently only identifies
1595 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1596 // cannot detect 2/3, for example.
1597 // Note that this method will also match potentially unappealing index
1598 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1599 // determine whether this is worth generating code for.
1601  unsigned NumElts = Op.getNumOperands();
1602  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1603  if (!Op.getValueType().isInteger())
1604  return None;
1605 
1606  Optional<unsigned> SeqStepDenom;
1607  Optional<int64_t> SeqStepNum, SeqAddend;
1609  unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1610  for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1611  // Assume undef elements match the sequence; we just have to be careful
1612  // when interpolating across them.
1613  if (Op.getOperand(Idx).isUndef())
1614  continue;
1615  // The BUILD_VECTOR must be all constants.
1616  if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1617  return None;
1618 
1619  uint64_t Val = Op.getConstantOperandVal(Idx) &
1620  maskTrailingOnes<uint64_t>(EltSizeInBits);
1621 
1622  if (PrevElt) {
1623  // Calculate the step since the last non-undef element, and ensure
1624  // it's consistent across the entire sequence.
1625  unsigned IdxDiff = Idx - PrevElt->second;
1626  int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1627 
1628  // A zero-value value difference means that we're somewhere in the middle
1629  // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1630  // step change before evaluating the sequence.
1631  if (ValDiff != 0) {
1632  int64_t Remainder = ValDiff % IdxDiff;
1633  // Normalize the step if it's greater than 1.
1634  if (Remainder != ValDiff) {
1635  // The difference must cleanly divide the element span.
1636  if (Remainder != 0)
1637  return None;
1638  ValDiff /= IdxDiff;
1639  IdxDiff = 1;
1640  }
1641 
1642  if (!SeqStepNum)
1643  SeqStepNum = ValDiff;
1644  else if (ValDiff != SeqStepNum)
1645  return None;
1646 
1647  if (!SeqStepDenom)
1648  SeqStepDenom = IdxDiff;
1649  else if (IdxDiff != *SeqStepDenom)
1650  return None;
1651  }
1652  }
1653 
1654  // Record and/or check any addend.
1655  if (SeqStepNum && SeqStepDenom) {
1656  uint64_t ExpectedVal =
1657  (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1658  int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1659  if (!SeqAddend)
1660  SeqAddend = Addend;
1661  else if (SeqAddend != Addend)
1662  return None;
1663  }
1664 
1665  // Record this non-undef element for later.
1666  if (!PrevElt || PrevElt->first != Val)
1667  PrevElt = std::make_pair(Val, Idx);
1668  }
1669  // We need to have logged both a step and an addend for this to count as
1670  // a legal index sequence.
1671  if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
1672  return None;
1673 
1674  return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
1675 }
1676 
1678  const RISCVSubtarget &Subtarget) {
1679  MVT VT = Op.getSimpleValueType();
1680  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1681 
1682  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1683 
1684  SDLoc DL(Op);
1685  SDValue Mask, VL;
1686  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1687 
1688  MVT XLenVT = Subtarget.getXLenVT();
1689  unsigned NumElts = Op.getNumOperands();
1690 
1691  if (VT.getVectorElementType() == MVT::i1) {
1692  if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1693  SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1694  return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1695  }
1696 
1697  if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1698  SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1699  return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1700  }
1701 
1702  // Lower constant mask BUILD_VECTORs via an integer vector type, in
1703  // scalar integer chunks whose bit-width depends on the number of mask
1704  // bits and XLEN.
1705  // First, determine the most appropriate scalar integer type to use. This
1706  // is at most XLenVT, but may be shrunk to a smaller vector element type
1707  // according to the size of the final vector - use i8 chunks rather than
1708  // XLenVT if we're producing a v8i1. This results in more consistent
1709  // codegen across RV32 and RV64.
1710  unsigned NumViaIntegerBits =
1711  std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1712  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1713  // If we have to use more than one INSERT_VECTOR_ELT then this
1714  // optimization is likely to increase code size; avoid peforming it in
1715  // such a case. We can use a load from a constant pool in this case.
1716  if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1717  return SDValue();
1718  // Now we can create our integer vector type. Note that it may be larger
1719  // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1720  MVT IntegerViaVecVT =
1721  MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1722  divideCeil(NumElts, NumViaIntegerBits));
1723 
1724  uint64_t Bits = 0;
1725  unsigned BitPos = 0, IntegerEltIdx = 0;
1726  SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1727 
1728  for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1729  // Once we accumulate enough bits to fill our scalar type, insert into
1730  // our vector and clear our accumulated data.
1731  if (I != 0 && I % NumViaIntegerBits == 0) {
1732  if (NumViaIntegerBits <= 32)
1733  Bits = SignExtend64(Bits, 32);
1734  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1735  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1736  Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1737  Bits = 0;
1738  BitPos = 0;
1739  IntegerEltIdx++;
1740  }
1741  SDValue V = Op.getOperand(I);
1742  bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
1743  Bits |= ((uint64_t)BitValue << BitPos);
1744  }
1745 
1746  // Insert the (remaining) scalar value into position in our integer
1747  // vector type.
1748  if (NumViaIntegerBits <= 32)
1749  Bits = SignExtend64(Bits, 32);
1750  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1751  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
1752  DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1753 
1754  if (NumElts < NumViaIntegerBits) {
1755  // If we're producing a smaller vector than our minimum legal integer
1756  // type, bitcast to the equivalent (known-legal) mask type, and extract
1757  // our final mask.
1758  assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
1759  Vec = DAG.getBitcast(MVT::v8i1, Vec);
1760  Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
1761  DAG.getConstant(0, DL, XLenVT));
1762  } else {
1763  // Else we must have produced an integer type with the same size as the
1764  // mask type; bitcast for the final result.
1765  assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
1766  Vec = DAG.getBitcast(VT, Vec);
1767  }
1768 
1769  return Vec;
1770  }
1771 
1772  // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
1773  // vector type, we have a legal equivalently-sized i8 type, so we can use
1774  // that.
1775  MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
1776  SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
1777 
1778  SDValue WideVec;
1779  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1780  // For a splat, perform a scalar truncate before creating the wider
1781  // vector.
1782  assert(Splat.getValueType() == XLenVT &&
1783  "Unexpected type for i1 splat value");
1784  Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
1785  DAG.getConstant(1, DL, XLenVT));
1786  WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
1787  } else {
1788  SmallVector<SDValue, 8> Ops(Op->op_values());
1789  WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
1790  SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
1791  WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
1792  }
1793 
1794  return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
1795  }
1796 
1797  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1798  unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1800  Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1801  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1802  }
1803 
1804  // Try and match index sequences, which we can lower to the vid instruction
1805  // with optional modifications. An all-undef vector is matched by
1806  // getSplatValue, above.
1807  if (auto SimpleVID = isSimpleVIDSequence(Op)) {
1808  int64_t StepNumerator = SimpleVID->StepNumerator;
1809  unsigned StepDenominator = SimpleVID->StepDenominator;
1810  int64_t Addend = SimpleVID->Addend;
1811  // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
1812  // threshold since it's the immediate value many RVV instructions accept.
1813  if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) &&
1814  isInt<5>(Addend)) {
1815  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1816  // Convert right out of the scalable type so we can use standard ISD
1817  // nodes for the rest of the computation. If we used scalable types with
1818  // these, we'd lose the fixed-length vector info and generate worse
1819  // vsetvli code.
1820  VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
1821  assert(StepNumerator != 0 && "Invalid step");
1822  bool Negate = false;
1823  if (StepNumerator != 1) {
1824  int64_t SplatStepVal = StepNumerator;
1825  unsigned Opcode = ISD::MUL;
1826  if (isPowerOf2_64(std::abs(StepNumerator))) {
1827  Negate = StepNumerator < 0;
1828  Opcode = ISD::SHL;
1829  SplatStepVal = Log2_64(std::abs(StepNumerator));
1830  }
1831  SDValue SplatStep = DAG.getSplatVector(
1832  VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
1833  VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep);
1834  }
1835  if (StepDenominator != 1) {
1836  SDValue SplatStep = DAG.getSplatVector(
1837  VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
1838  VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
1839  }
1840  if (Addend != 0 || Negate) {
1841  SDValue SplatAddend =
1842  DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
1843  VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
1844  }
1845  return VID;
1846  }
1847  }
1848 
1849  // Attempt to detect "hidden" splats, which only reveal themselves as splats
1850  // when re-interpreted as a vector with a larger element type. For example,
1851  // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
1852  // could be instead splat as
1853  // v2i32 = build_vector i32 0x00010000, i32 0x00010000
1854  // TODO: This optimization could also work on non-constant splats, but it
1855  // would require bit-manipulation instructions to construct the splat value.
1857  unsigned EltBitSize = VT.getScalarSizeInBits();
1858  const auto *BV = cast<BuildVectorSDNode>(Op);
1859  if (VT.isInteger() && EltBitSize < 64 &&
1861  BV->getRepeatedSequence(Sequence) &&
1862  (Sequence.size() * EltBitSize) <= 64) {
1863  unsigned SeqLen = Sequence.size();
1864  MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
1865  MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
1866  assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
1867  ViaIntVT == MVT::i64) &&
1868  "Unexpected sequence type");
1869 
1870  unsigned EltIdx = 0;
1871  uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
1872  uint64_t SplatValue = 0;
1873  // Construct the amalgamated value which can be splatted as this larger
1874  // vector type.
1875  for (const auto &SeqV : Sequence) {
1876  if (!SeqV.isUndef())
1877  SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
1878  << (EltIdx * EltBitSize));
1879  EltIdx++;
1880  }
1881 
1882  // On RV64, sign-extend from 32 to 64 bits where possible in order to
1883  // achieve better constant materializion.
1884  if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
1885  SplatValue = SignExtend64(SplatValue, 32);
1886 
1887  // Since we can't introduce illegal i64 types at this stage, we can only
1888  // perform an i64 splat on RV32 if it is its own sign-extended value. That
1889  // way we can use RVV instructions to splat.
1890  assert((ViaIntVT.bitsLE(XLenVT) ||
1891  (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
1892  "Unexpected bitcast sequence");
1893  if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
1894  SDValue ViaVL =
1895  DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
1896  MVT ViaContainerVT =
1897  getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
1898  SDValue Splat =
1899  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
1900  DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
1901  Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
1902  return DAG.getBitcast(VT, Splat);
1903  }
1904  }
1905 
1906  // Try and optimize BUILD_VECTORs with "dominant values" - these are values
1907  // which constitute a large proportion of the elements. In such cases we can
1908  // splat a vector with the dominant element and make up the shortfall with
1909  // INSERT_VECTOR_ELTs.
1910  // Note that this includes vectors of 2 elements by association. The
1911  // upper-most element is the "dominant" one, allowing us to use a splat to
1912  // "insert" the upper element, and an insert of the lower element at position
1913  // 0, which improves codegen.
1914  SDValue DominantValue;
1915  unsigned MostCommonCount = 0;
1916  DenseMap<SDValue, unsigned> ValueCounts;
1917  unsigned NumUndefElts =
1918  count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
1919 
1920  // Track the number of scalar loads we know we'd be inserting, estimated as
1921  // any non-zero floating-point constant. Other kinds of element are either
1922  // already in registers or are materialized on demand. The threshold at which
1923  // a vector load is more desirable than several scalar materializion and
1924  // vector-insertion instructions is not known.
1925  unsigned NumScalarLoads = 0;
1926 
1927  for (SDValue V : Op->op_values()) {
1928  if (V.isUndef())
1929  continue;
1930 
1931  ValueCounts.insert(std::make_pair(V, 0));
1932  unsigned &Count = ValueCounts[V];
1933 
1934  if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
1935  NumScalarLoads += !CFP->isExactlyValue(+0.0);
1936 
1937  // Is this value dominant? In case of a tie, prefer the highest element as
1938  // it's cheaper to insert near the beginning of a vector than it is at the
1939  // end.
1940  if (++Count >= MostCommonCount) {
1941  DominantValue = V;
1942  MostCommonCount = Count;
1943  }
1944  }
1945 
1946  assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
1947  unsigned NumDefElts = NumElts - NumUndefElts;
1948  unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
1949 
1950  // Don't perform this optimization when optimizing for size, since
1951  // materializing elements and inserting them tends to cause code bloat.
1952  if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
1953  ((MostCommonCount > DominantValueCountThreshold) ||
1954  (ValueCounts.size() <= Log2_32(NumDefElts)))) {
1955  // Start by splatting the most common element.
1956  SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
1957 
1958  DenseSet<SDValue> Processed{DominantValue};
1959  MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
1960  for (const auto &OpIdx : enumerate(Op->ops())) {
1961  const SDValue &V = OpIdx.value();
1962  if (V.isUndef() || !Processed.insert(V).second)
1963  continue;
1964  if (ValueCounts[V] == 1) {
1965  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
1966  DAG.getConstant(OpIdx.index(), DL, XLenVT));
1967  } else {
1968  // Blend in all instances of this value using a VSELECT, using a
1969  // mask where each bit signals whether that element is the one
1970  // we're after.
1972  transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
1973  return DAG.getConstant(V == V1, DL, XLenVT);
1974  });
1975  Vec = DAG.getNode(ISD::VSELECT, DL, VT,
1976  DAG.getBuildVector(SelMaskTy, DL, Ops),
1977  DAG.getSplatBuildVector(VT, DL, V), Vec);
1978  }
1979  }
1980 
1981  return Vec;
1982  }
1983 
1984  return SDValue();
1985 }
1986 
1988  SDValue Hi, SDValue VL, SelectionDAG &DAG) {
1989  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
1990  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
1991  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
1992  // If Hi constant is all the same sign bit as Lo, lower this as a custom
1993  // node in order to try and match RVV vector/scalar instructions.
1994  if ((LoC >> 31) == HiC)
1995  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
1996  }
1997 
1998  // Fall back to a stack store and stride x0 vector load.
1999  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
2000 }
2001 
2002 // Called by type legalization to handle splat of i64 on RV32.
2003 // FIXME: We can optimize this when the type has sign or zero bits in one
2004 // of the halves.
2005 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
2006  SDValue VL, SelectionDAG &DAG) {
2007  assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2009  DAG.getConstant(0, DL, MVT::i32));
2011  DAG.getConstant(1, DL, MVT::i32));
2012  return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
2013 }
2014 
2015 // This function lowers a splat of a scalar operand Splat with the vector
2016 // length VL. It ensures the final sequence is type legal, which is useful when
2017 // lowering a splat after type legalization.
2019  SelectionDAG &DAG,
2020  const RISCVSubtarget &Subtarget) {
2021  if (VT.isFloatingPoint())
2022  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
2023 
2024  MVT XLenVT = Subtarget.getXLenVT();
2025 
2026  // Simplest case is that the operand needs to be promoted to XLenVT.
2027  if (Scalar.getValueType().bitsLE(XLenVT)) {
2028  // If the operand is a constant, sign extend to increase our chances
2029  // of being able to use a .vi instruction. ANY_EXTEND would become a
2030  // a zero extend and the simm5 check in isel would fail.
2031  // FIXME: Should we ignore the upper bits in isel instead?
2032  unsigned ExtOpc =
2033  isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2034  Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2035  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
2036  }
2037 
2038  assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2039  "Unexpected scalar for splat lowering!");
2040 
2041  // Otherwise use the more complicated splatting algorithm.
2042  return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
2043 }
2044 
2046  const RISCVSubtarget &Subtarget) {
2047  SDValue V1 = Op.getOperand(0);
2048  SDValue V2 = Op.getOperand(1);
2049  SDLoc DL(Op);
2050  MVT XLenVT = Subtarget.getXLenVT();
2051  MVT VT = Op.getSimpleValueType();
2052  unsigned NumElts = VT.getVectorNumElements();
2053  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
2054 
2055  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2056 
2057  SDValue TrueMask, VL;
2058  std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2059 
2060  if (SVN->isSplat()) {
2061  const int Lane = SVN->getSplatIndex();
2062  if (Lane >= 0) {
2063  MVT SVT = VT.getVectorElementType();
2064 
2065  // Turn splatted vector load into a strided load with an X0 stride.
2066  SDValue V = V1;
2067  // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
2068  // with undef.
2069  // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
2070  int Offset = Lane;
2071  if (V.getOpcode() == ISD::CONCAT_VECTORS) {
2072  int OpElements =
2074  V = V.getOperand(Offset / OpElements);
2075  Offset %= OpElements;
2076  }
2077 
2078  // We need to ensure the load isn't atomic or volatile.
2079  if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
2080  auto *Ld = cast<LoadSDNode>(V);
2081  Offset *= SVT.getStoreSize();
2082  SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2084 
2085  // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2086  if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2087  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2088  SDValue IntID =
2089  DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2090  SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
2091  DAG.getRegister(RISCV::X0, XLenVT), VL};
2092  SDValue NewLoad = DAG.getMemIntrinsicNode(
2093  ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2095  Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2096  DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2097  return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2098  }
2099 
2100  // Otherwise use a scalar load and splat. This will give the best
2101  // opportunity to fold a splat into the operation. ISel can turn it into
2102  // the x0 strided load if we aren't able to fold away the select.
2103  if (SVT.isFloatingPoint())
2104  V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2105  Ld->getPointerInfo().getWithOffset(Offset),
2106  Ld->getOriginalAlign(),
2107  Ld->getMemOperand()->getFlags());
2108  else
2109  V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2110  Ld->getPointerInfo().getWithOffset(Offset), SVT,
2111  Ld->getOriginalAlign(),
2112  Ld->getMemOperand()->getFlags());
2113  DAG.makeEquivalentMemoryOrdering(Ld, V);
2114 
2115  unsigned Opc =
2117  SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
2118  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2119  }
2120 
2121  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2122  assert(Lane < (int)NumElts && "Unexpected lane!");
2123  SDValue Gather =
2124  DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
2125  DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
2126  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2127  }
2128  }
2129 
2130  // Detect shuffles which can be re-expressed as vector selects; these are
2131  // shuffles in which each element in the destination is taken from an element
2132  // at the corresponding index in either source vectors.
2133  bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
2134  int MaskIndex = MaskIdx.value();
2135  return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2136  });
2137 
2138  assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2139 
2140  SmallVector<SDValue> MaskVals;
2141  // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2142  // merged with a second vrgather.
2143  SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2144 
2145  // By default we preserve the original operand order, and use a mask to
2146  // select LHS as true and RHS as false. However, since RVV vector selects may
2147  // feature splats but only on the LHS, we may choose to invert our mask and
2148  // instead select between RHS and LHS.
2149  bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2150  bool InvertMask = IsSelect == SwapOps;
2151 
2152  // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2153  // half.
2154  DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2155 
2156  // Now construct the mask that will be used by the vselect or blended
2157  // vrgather operation. For vrgathers, construct the appropriate indices into
2158  // each vector.
2159  for (int MaskIndex : SVN->getMask()) {
2160  bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2161  MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2162  if (!IsSelect) {
2163  bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2164  GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2165  ? DAG.getConstant(MaskIndex, DL, XLenVT)
2166  : DAG.getUNDEF(XLenVT));
2167  GatherIndicesRHS.push_back(
2168  IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2169  : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2170  if (IsLHSOrUndefIndex && MaskIndex >= 0)
2171  ++LHSIndexCounts[MaskIndex];
2172  if (!IsLHSOrUndefIndex)
2173  ++RHSIndexCounts[MaskIndex - NumElts];
2174  }
2175  }
2176 
2177  if (SwapOps) {
2178  std::swap(V1, V2);
2179  std::swap(GatherIndicesLHS, GatherIndicesRHS);
2180  }
2181 
2182  assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2183  MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2184  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2185 
2186  if (IsSelect)
2187  return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2188 
2189  if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2190  // On such a large vector we're unable to use i8 as the index type.
2191  // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2192  // may involve vector splitting if we're already at LMUL=8, or our
2193  // user-supplied maximum fixed-length LMUL.
2194  return SDValue();
2195  }
2196 
2197  unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2198  unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2199  MVT IndexVT = VT.changeTypeToInteger();
2200  // Since we can't introduce illegal index types at this stage, use i16 and
2201  // vrgatherei16 if the corresponding index type for plain vrgather is greater
2202  // than XLenVT.
2203  if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2204  GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2205  IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2206  }
2207 
2208  MVT IndexContainerVT =
2209  ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2210 
2211  SDValue Gather;
2212  // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2213  // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2214  if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2215  Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
2216  } else {
2217  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2218  // If only one index is used, we can use a "splat" vrgather.
2219  // TODO: We can splat the most-common index and fix-up any stragglers, if
2220  // that's beneficial.
2221  if (LHSIndexCounts.size() == 1) {
2222  int SplatIndex = LHSIndexCounts.begin()->getFirst();
2223  Gather =
2224  DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2225  DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2226  } else {
2227  SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2228  LHSIndices =
2229  convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2230 
2231  Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2232  TrueMask, VL);
2233  }
2234  }
2235 
2236  // If a second vector operand is used by this shuffle, blend it in with an
2237  // additional vrgather.
2238  if (!V2.isUndef()) {
2239  V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2240  // If only one index is used, we can use a "splat" vrgather.
2241  // TODO: We can splat the most-common index and fix-up any stragglers, if
2242  // that's beneficial.
2243  if (RHSIndexCounts.size() == 1) {
2244  int SplatIndex = RHSIndexCounts.begin()->getFirst();
2245  V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2246  DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2247  } else {
2248  SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2249  RHSIndices =
2250  convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2251  V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
2252  VL);
2253  }
2254 
2255  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2256  SelectMask =
2257  convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2258 
2259  Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
2260  Gather, VL);
2261  }
2262 
2263  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2264 }
2265 
2266 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
2267  SDLoc DL, SelectionDAG &DAG,
2268  const RISCVSubtarget &Subtarget) {
2269  if (VT.isScalableVector())
2270  return DAG.getFPExtendOrRound(Op, DL, VT);
2271  assert(VT.isFixedLengthVector() &&
2272  "Unexpected value type for RVV FP extend/round lowering");
2273  SDValue Mask, VL;
2274  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2275  unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
2278  return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
2279 }
2280 
2281 // While RVV has alignment restrictions, we should always be able to load as a
2282 // legal equivalently-sized byte-typed vector instead. This method is
2283 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2284 // the load is already correctly-aligned, it returns SDValue().
2285 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2286  SelectionDAG &DAG) const {
2287  auto *Load = cast<LoadSDNode>(Op);
2288  assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2289 
2291  Load->getMemoryVT(),
2292  *Load->getMemOperand()))
2293  return SDValue();
2294 
2295  SDLoc DL(Op);
2296  MVT VT = Op.getSimpleValueType();
2297  unsigned EltSizeBits = VT.getScalarSizeInBits();
2298  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2299  "Unexpected unaligned RVV load type");
2300  MVT NewVT =
2301  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2302  assert(NewVT.isValid() &&
2303  "Expecting equally-sized RVV vector types to be legal");
2304  SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2305  Load->getPointerInfo(), Load->getOriginalAlign(),
2306  Load->getMemOperand()->getFlags());
2307  return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2308 }
2309 
2310 // While RVV has alignment restrictions, we should always be able to store as a
2311 // legal equivalently-sized byte-typed vector instead. This method is
2312 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2313 // returns SDValue() if the store is already correctly aligned.
2314 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2315  SelectionDAG &DAG) const {
2316  auto *Store = cast<StoreSDNode>(Op);
2317  assert(Store && Store->getValue().getValueType().isVector() &&
2318  "Expected vector store");
2319 
2321  Store->getMemoryVT(),
2322  *Store->getMemOperand()))
2323  return SDValue();
2324 
2325  SDLoc DL(Op);
2326  SDValue StoredVal = Store->getValue();
2327  MVT VT = StoredVal.getSimpleValueType();
2328  unsigned EltSizeBits = VT.getScalarSizeInBits();
2329  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2330  "Unexpected unaligned RVV store type");
2331  MVT NewVT =
2332  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2333  assert(NewVT.isValid() &&
2334  "Expecting equally-sized RVV vector types to be legal");
2335  StoredVal = DAG.getBitcast(NewVT, StoredVal);
2336  return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2337  Store->getPointerInfo(), Store->getOriginalAlign(),
2338  Store->getMemOperand()->getFlags());
2339 }
2340 
2342  SelectionDAG &DAG) const {
2343  switch (Op.getOpcode()) {
2344  default:
2345  report_fatal_error("unimplemented operand");
2346  case ISD::GlobalAddress:
2347  return lowerGlobalAddress(Op, DAG);
2348  case ISD::BlockAddress:
2349  return lowerBlockAddress(Op, DAG);
2350  case ISD::ConstantPool:
2351  return lowerConstantPool(Op, DAG);
2352  case ISD::JumpTable:
2353  return lowerJumpTable(Op, DAG);
2354  case ISD::GlobalTLSAddress:
2355  return lowerGlobalTLSAddress(Op, DAG);
2356  case ISD::SELECT:
2357  return lowerSELECT(Op, DAG);
2358  case ISD::BRCOND:
2359  return lowerBRCOND(Op, DAG);
2360  case ISD::VASTART:
2361  return lowerVASTART(Op, DAG);
2362  case ISD::FRAMEADDR:
2363  return lowerFRAMEADDR(Op, DAG);
2364  case ISD::RETURNADDR:
2365  return lowerRETURNADDR(Op, DAG);
2366  case ISD::SHL_PARTS:
2367  return lowerShiftLeftParts(Op, DAG);
2368  case ISD::SRA_PARTS:
2369  return lowerShiftRightParts(Op, DAG, true);
2370  case ISD::SRL_PARTS:
2371  return lowerShiftRightParts(Op, DAG, false);
2372  case ISD::BITCAST: {
2373  SDLoc DL(Op);
2374  EVT VT = Op.getValueType();
2375  SDValue Op0 = Op.getOperand(0);
2376  EVT Op0VT = Op0.getValueType();
2377  MVT XLenVT = Subtarget.getXLenVT();
2378  if (VT.isFixedLengthVector()) {
2379  // We can handle fixed length vector bitcasts with a simple replacement
2380  // in isel.
2381  if (Op0VT.isFixedLengthVector())
2382  return Op;
2383  // When bitcasting from scalar to fixed-length vector, insert the scalar
2384  // into a one-element vector of the result type, and perform a vector
2385  // bitcast.
2386  if (!Op0VT.isVector()) {
2387  auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2388  return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2389  DAG.getUNDEF(BVT), Op0,
2390  DAG.getConstant(0, DL, XLenVT)));
2391  }
2392  return SDValue();
2393  }
2394  // Custom-legalize bitcasts from fixed-length vector types to scalar types
2395  // thus: bitcast the vector to a one-element vector type whose element type
2396  // is the same as the result type, and extract the first element.
2397  if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2398  LLVMContext &Context = *DAG.getContext();
2399  SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
2400  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
2401  DAG.getConstant(0, DL, XLenVT));
2402  }
2403  if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
2404  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
2405  SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
2406  return FPConv;
2407  }
2408  if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
2409  Subtarget.hasStdExtF()) {
2410  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2411  SDValue FPConv =
2412  DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
2413  return FPConv;
2414  }
2415  return SDValue();
2416  }
2418  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2420  return LowerINTRINSIC_W_CHAIN(Op, DAG);
2421  case ISD::INTRINSIC_VOID:
2422  return LowerINTRINSIC_VOID(Op, DAG);
2423  case ISD::BSWAP:
2424  case ISD::BITREVERSE: {
2425  // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
2426  assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2427  MVT VT = Op.getSimpleValueType();
2428  SDLoc DL(Op);
2429  // Start with the maximum immediate value which is the bitwidth - 1.
2430  unsigned Imm = VT.getSizeInBits() - 1;
2431  // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
2432  if (Op.getOpcode() == ISD::BSWAP)
2433  Imm &= ~0x7U;
2434  return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
2435  DAG.getConstant(Imm, DL, VT));
2436  }
2437  case ISD::FSHL:
2438  case ISD::FSHR: {
2439  MVT VT = Op.getSimpleValueType();
2440  assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
2441  SDLoc DL(Op);
2442  if (Op.getOperand(2).getOpcode() == ISD::Constant)
2443  return Op;
2444  // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
2445  // use log(XLen) bits. Mask the shift amount accordingly.
2446  unsigned ShAmtWidth = Subtarget.getXLen() - 1;
2447  SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
2448  DAG.getConstant(ShAmtWidth, DL, VT));
2449  unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
2450  return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
2451  }
2452  case ISD::TRUNCATE: {
2453  SDLoc DL(Op);
2454  MVT VT = Op.getSimpleValueType();
2455  // Only custom-lower vector truncates
2456  if (!VT.isVector())
2457  return Op;
2458 
2459  // Truncates to mask types are handled differently
2460  if (VT.getVectorElementType() == MVT::i1)
2461  return lowerVectorMaskTrunc(Op, DAG);
2462 
2463  // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
2464  // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
2465  // truncate by one power of two at a time.
2466  MVT DstEltVT = VT.getVectorElementType();
2467 
2468  SDValue Src = Op.getOperand(0);
2469  MVT SrcVT = Src.getSimpleValueType();
2470  MVT SrcEltVT = SrcVT.getVectorElementType();
2471 
2472  assert(DstEltVT.bitsLT(SrcEltVT) &&
2473  isPowerOf2_64(DstEltVT.getSizeInBits()) &&
2474  isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
2475  "Unexpected vector truncate lowering");
2476 
2477  MVT ContainerVT = SrcVT;
2478  if (SrcVT.isFixedLengthVector()) {
2479  ContainerVT = getContainerForFixedLengthVector(SrcVT);
2480  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2481  }
2482 
2483  SDValue Result = Src;
2484  SDValue Mask, VL;
2485  std::tie(Mask, VL) =
2486  getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
2487  LLVMContext &Context = *DAG.getContext();
2488  const ElementCount Count = ContainerVT.getVectorElementCount();
2489  do {
2490  SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2491  EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
2492  Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
2493  Mask, VL);
2494  } while (SrcEltVT != DstEltVT);
2495 
2496  if (SrcVT.isFixedLengthVector())
2497  Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
2498 
2499  return Result;
2500  }
2501  case ISD::ANY_EXTEND:
2502  case ISD::ZERO_EXTEND:
2503  if (Op.getOperand(0).getValueType().isVector() &&
2504  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2505  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
2506  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
2507  case ISD::SIGN_EXTEND:
2508  if (Op.getOperand(0).getValueType().isVector() &&
2509  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2510  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
2511  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
2513  return lowerSPLAT_VECTOR_PARTS(Op, DAG);
2515  return lowerINSERT_VECTOR_ELT(Op, DAG);
2517  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
2518  case ISD::VSCALE: {
2519  MVT VT = Op.getSimpleValueType();
2520  SDLoc DL(Op);
2521  SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
2522  // We define our scalable vector types for lmul=1 to use a 64 bit known
2523  // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
2524  // vscale as VLENB / 8.
2525  assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!");
2526  if (isa<ConstantSDNode>(Op.getOperand(0))) {
2527  // We assume VLENB is a multiple of 8. We manually choose the best shift
2528  // here because SimplifyDemandedBits isn't always able to simplify it.
2529  uint64_t Val = Op.getConstantOperandVal(0);
2530  if (isPowerOf2_64(Val)) {
2531  uint64_t Log2 = Log2_64(Val);
2532  if (Log2 < 3)
2533  return DAG.getNode(ISD::SRL, DL, VT, VLENB,
2534  DAG.getConstant(3 - Log2, DL, VT));
2535  if (Log2 > 3)
2536  return DAG.getNode(ISD::SHL, DL, VT, VLENB,
2537  DAG.getConstant(Log2 - 3, DL, VT));
2538  return VLENB;
2539  }
2540  // If the multiplier is a multiple of 8, scale it down to avoid needing
2541  // to shift the VLENB value.
2542  if ((Val % 8) == 0)
2543  return DAG.getNode(ISD::MUL, DL, VT, VLENB,
2544  DAG.getConstant(Val / 8, DL, VT));
2545  }
2546 
2547  SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
2548  DAG.getConstant(3, DL, VT));
2549  return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
2550  }
2551  case ISD::FP_EXTEND: {
2552  // RVV can only do fp_extend to types double the size as the source. We
2553  // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
2554  // via f32.
2555  SDLoc DL(Op);
2556  MVT VT = Op.getSimpleValueType();
2557  SDValue Src = Op.getOperand(0);
2558  MVT SrcVT = Src.getSimpleValueType();
2559 
2560  // Prepare any fixed-length vector operands.
2561  MVT ContainerVT = VT;
2562  if (SrcVT.isFixedLengthVector()) {
2563  ContainerVT = getContainerForFixedLengthVector(VT);
2564  MVT SrcContainerVT =
2565  ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
2566  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2567  }
2568 
2569  if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
2570  SrcVT.getVectorElementType() != MVT::f16) {
2571  // For scalable vectors, we only need to close the gap between
2572  // vXf16->vXf64.
2573  if (!VT.isFixedLengthVector())
2574  return Op;
2575  // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
2576  Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2577  return convertFromScalableVector(VT, Src, DAG, Subtarget);
2578  }
2579 
2580  MVT InterVT = VT.changeVectorElementType(MVT::f32);
2581  MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
2582  SDValue IntermediateExtend = getRVVFPExtendOrRound(
2583  Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
2584 
2585  SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
2586  DL, DAG, Subtarget);
2587  if (VT.isFixedLengthVector())
2588  return convertFromScalableVector(VT, Extend, DAG, Subtarget);
2589  return Extend;
2590  }
2591  case ISD::FP_ROUND: {
2592  // RVV can only do fp_round to types half the size as the source. We
2593  // custom-lower f64->f16 rounds via RVV's round-to-odd float
2594  // conversion instruction.
2595  SDLoc DL(Op);
2596  MVT VT = Op.getSimpleValueType();
2597  SDValue Src = Op.getOperand(0);
2598  MVT SrcVT = Src.getSimpleValueType();
2599 
2600  // Prepare any fixed-length vector operands.
2601  MVT ContainerVT = VT;
2602  if (VT.isFixedLengthVector()) {
2603  MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2604  ContainerVT =
2605  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2606  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2607  }
2608 
2609  if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
2610  SrcVT.getVectorElementType() != MVT::f64) {
2611  // For scalable vectors, we only need to close the gap between
2612  // vXf64<->vXf16.
2613  if (!VT.isFixedLengthVector())
2614  return Op;
2615  // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
2616  Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2617  return convertFromScalableVector(VT, Src, DAG, Subtarget);
2618  }
2619 
2620  SDValue Mask, VL;
2621  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2622 
2623  MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
2624  SDValue IntermediateRound =
2625  DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
2626  SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
2627  DL, DAG, Subtarget);
2628 
2629  if (VT.isFixedLengthVector())
2630  return convertFromScalableVector(VT, Round, DAG, Subtarget);
2631  return Round;
2632  }
2633  case ISD::FP_TO_SINT:
2634  case ISD::FP_TO_UINT:
2635  case ISD::SINT_TO_FP:
2636  case ISD::UINT_TO_FP: {
2637  // RVV can only do fp<->int conversions to types half/double the size as
2638  // the source. We custom-lower any conversions that do two hops into
2639  // sequences.
2640  MVT VT = Op.getSimpleValueType();
2641  if (!VT.isVector())
2642  return Op;
2643  SDLoc DL(Op);
2644  SDValue Src = Op.getOperand(0);
2645  MVT EltVT = VT.getVectorElementType();
2646  MVT SrcVT = Src.getSimpleValueType();
2647  MVT SrcEltVT = SrcVT.getVectorElementType();
2648  unsigned EltSize = EltVT.getSizeInBits();
2649  unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2650  assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
2651  "Unexpected vector element types");
2652 
2653  bool IsInt2FP = SrcEltVT.isInteger();
2654  // Widening conversions
2655  if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
2656  if (IsInt2FP) {
2657  // Do a regular integer sign/zero extension then convert to float.
2659  VT.getVectorElementCount());
2660  unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
2662  : ISD::SIGN_EXTEND;
2663  SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
2664  return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
2665  }
2666  // FP2Int
2667  assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
2668  // Do one doubling fp_extend then complete the operation by converting
2669  // to int.
2670  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2671  SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
2672  return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
2673  }
2674 
2675  // Narrowing conversions
2676  if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
2677  if (IsInt2FP) {
2678  // One narrowing int_to_fp, then an fp_round.
2679  assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
2680  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2681  SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
2682  return DAG.getFPExtendOrRound(Int2FP, DL, VT);
2683  }
2684  // FP2Int
2685  // One narrowing fp_to_int, then truncate the integer. If the float isn't
2686  // representable by the integer, the result is poison.
2687  MVT IVecVT =
2689  VT.getVectorElementCount());
2690  SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
2691  return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
2692  }
2693 
2694  // Scalable vectors can exit here. Patterns will handle equally-sized
2695  // conversions halving/doubling ones.
2696  if (!VT.isFixedLengthVector())
2697  return Op;
2698 
2699  // For fixed-length vectors we lower to a custom "VL" node.
2700  unsigned RVVOpc = 0;
2701  switch (Op.getOpcode()) {
2702  default:
2703  llvm_unreachable("Impossible opcode");
2704  case ISD::FP_TO_SINT:
2705  RVVOpc = RISCVISD::FP_TO_SINT_VL;
2706  break;
2707  case ISD::FP_TO_UINT:
2708  RVVOpc = RISCVISD::FP_TO_UINT_VL;
2709  break;
2710  case ISD::SINT_TO_FP:
2711  RVVOpc = RISCVISD::SINT_TO_FP_VL;
2712  break;
2713  case ISD::UINT_TO_FP:
2714  RVVOpc = RISCVISD::UINT_TO_FP_VL;
2715  break;
2716  }
2717 
2718  MVT ContainerVT, SrcContainerVT;
2719  // Derive the reference container type from the larger vector type.
2720  if (SrcEltSize > EltSize) {
2721  SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2722  ContainerVT =
2723  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2724  } else {
2725  ContainerVT = getContainerForFixedLengthVector(VT);
2726  SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
2727  }
2728 
2729  SDValue Mask, VL;
2730  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2731 
2732  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2733  Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
2734  return convertFromScalableVector(VT, Src, DAG, Subtarget);
2735  }
2736  case ISD::FP_TO_SINT_SAT:
2737  case ISD::FP_TO_UINT_SAT:
2738  return lowerFP_TO_INT_SAT(Op, DAG);
2739  case ISD::VECREDUCE_ADD:
2740  case ISD::VECREDUCE_UMAX:
2741  case ISD::VECREDUCE_SMAX:
2742  case ISD::VECREDUCE_UMIN:
2743  case ISD::VECREDUCE_SMIN:
2744  return lowerVECREDUCE(Op, DAG);
2745  case ISD::VECREDUCE_AND:
2746  case ISD::VECREDUCE_OR:
2747  case ISD::VECREDUCE_XOR:
2748  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2749  return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
2750  return lowerVECREDUCE(Op, DAG);
2751  case ISD::VECREDUCE_FADD:
2753  case ISD::VECREDUCE_FMIN:
2754  case ISD::VECREDUCE_FMAX:
2755  return lowerFPVECREDUCE(Op, DAG);
2756  case ISD::VP_REDUCE_ADD:
2757  case ISD::VP_REDUCE_UMAX:
2758  case ISD::VP_REDUCE_SMAX:
2759  case ISD::VP_REDUCE_UMIN:
2760  case ISD::VP_REDUCE_SMIN:
2761  case ISD::VP_REDUCE_FADD:
2762  case ISD::VP_REDUCE_SEQ_FADD:
2763  case ISD::VP_REDUCE_FMIN:
2764  case ISD::VP_REDUCE_FMAX:
2765  return lowerVPREDUCE(Op, DAG);
2766  case ISD::VP_REDUCE_AND:
2767  case ISD::VP_REDUCE_OR:
2768  case ISD::VP_REDUCE_XOR:
2769  if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
2770  return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
2771  return lowerVPREDUCE(Op, DAG);
2772  case ISD::INSERT_SUBVECTOR:
2773  return lowerINSERT_SUBVECTOR(Op, DAG);
2775  return lowerEXTRACT_SUBVECTOR(Op, DAG);
2776  case ISD::STEP_VECTOR:
2777  return lowerSTEP_VECTOR(Op, DAG);
2778  case ISD::VECTOR_REVERSE:
2779  return lowerVECTOR_REVERSE(Op, DAG);
2780  case ISD::BUILD_VECTOR:
2781  return lowerBUILD_VECTOR(Op, DAG, Subtarget);
2782  case ISD::SPLAT_VECTOR:
2783  if (Op.getValueType().getVectorElementType() == MVT::i1)
2784  return lowerVectorMaskSplat(Op, DAG);
2785  return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
2786  case ISD::VECTOR_SHUFFLE:
2787  return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
2788  case ISD::CONCAT_VECTORS: {
2789  // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
2790  // better than going through the stack, as the default expansion does.
2791  SDLoc DL(Op);
2792  MVT VT = Op.getSimpleValueType();
2793  unsigned NumOpElts =
2794  Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
2795  SDValue Vec = DAG.getUNDEF(VT);
2796  for (const auto &OpIdx : enumerate(Op->ops()))
2797  Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
2798  DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
2799  return Vec;
2800  }
2801  case ISD::LOAD:
2802  if (auto V = expandUnalignedRVVLoad(Op, DAG))
2803  return V;
2804  if (Op.getValueType().isFixedLengthVector())
2805  return lowerFixedLengthVectorLoadToRVV(Op, DAG);
2806  return Op;
2807  case ISD::STORE:
2808  if (auto V = expandUnalignedRVVStore(Op, DAG))
2809  return V;
2810  if (Op.getOperand(1).getValueType().isFixedLengthVector())
2811  return lowerFixedLengthVectorStoreToRVV(Op, DAG);
2812  return Op;
2813  case ISD::MLOAD:
2814  case ISD::VP_LOAD:
2815  return lowerMaskedLoad(Op, DAG);
2816  case ISD::MSTORE:
2817  case ISD::VP_STORE:
2818  return lowerMaskedStore(Op, DAG);
2819  case ISD::SETCC:
2820  return lowerFixedLengthVectorSetccToRVV(Op, DAG);
2821  case ISD::ADD:
2822  return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
2823  case ISD::SUB:
2824  return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
2825  case ISD::MUL:
2826  return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
2827  case ISD::MULHS:
2828  return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
2829  case ISD::MULHU:
2830  return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
2831  case ISD::AND:
2832  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
2834  case ISD::OR:
2835  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
2836  RISCVISD::OR_VL);
2837  case ISD::XOR:
2838  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
2840  case ISD::SDIV:
2841  return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
2842  case ISD::SREM:
2843  return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
2844  case ISD::UDIV:
2845  return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
2846  case ISD::UREM:
2847  return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
2848  case ISD::SHL:
2849  case ISD::SRA:
2850  case ISD::SRL:
2851  if (Op.getSimpleValueType().isFixedLengthVector())
2852  return lowerFixedLengthVectorShiftToRVV(Op, DAG);
2853  // This can be called for an i32 shift amount that needs to be promoted.
2854  assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
2855  "Unexpected custom legalisation");
2856  return SDValue();
2857  case ISD::SADDSAT:
2858  return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
2859  case ISD::UADDSAT:
2860  return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
2861  case ISD::SSUBSAT:
2862  return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
2863  case ISD::USUBSAT:
2864  return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
2865  case ISD::FADD:
2866  return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
2867  case ISD::FSUB:
2868  return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
2869  case ISD::FMUL:
2870  return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
2871  case ISD::FDIV:
2872  return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
2873  case ISD::FNEG:
2874  return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
2875  case ISD::FABS:
2876  return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
2877  case ISD::FSQRT:
2878  return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
2879  case ISD::FMA:
2880  return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
2881  case ISD::SMIN:
2882  return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
2883  case ISD::SMAX:
2884  return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
2885  case ISD::UMIN:
2886  return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
2887  case ISD::UMAX:
2888  return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
2889  case ISD::FMINNUM:
2890  return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
2891  case ISD::FMAXNUM:
2892  return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
2893  case ISD::ABS:
2894  return lowerABS(Op, DAG);
2895  case ISD::VSELECT:
2896  return lowerFixedLengthVectorSelectToRVV(Op, DAG);
2897  case ISD::FCOPYSIGN:
2898  return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
2899  case ISD::MGATHER:
2900  case ISD::VP_GATHER:
2901  return lowerMaskedGather(Op, DAG);
2902  case ISD::MSCATTER:
2903  case ISD::VP_SCATTER:
2904  return lowerMaskedScatter(Op, DAG);
2905  case ISD::FLT_ROUNDS_:
2906  return lowerGET_ROUNDING(Op, DAG);
2907  case ISD::SET_ROUNDING:
2908  return lowerSET_ROUNDING(Op, DAG);
2909  case ISD::VP_ADD:
2910  return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
2911  case ISD::VP_SUB:
2912  return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
2913  case ISD::VP_MUL:
2914  return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
2915  case ISD::VP_SDIV:
2916  return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
2917  case ISD::VP_UDIV:
2918  return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
2919  case ISD::VP_SREM:
2920  return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
2921  case ISD::VP_UREM:
2922  return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
2923  case ISD::VP_AND:
2924  return lowerVPOp(Op, DAG, RISCVISD::AND_VL);
2925  case ISD::VP_OR:
2926  return lowerVPOp(Op, DAG, RISCVISD::OR_VL);
2927  case ISD::VP_XOR:
2928  return lowerVPOp(Op, DAG, RISCVISD::XOR_VL);
2929  case ISD::VP_ASHR:
2930  return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
2931  case ISD::VP_LSHR:
2932  return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
2933  case ISD::VP_SHL:
2934  return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
2935  case ISD::VP_FADD:
2936  return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
2937  case ISD::VP_FSUB:
2938  return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
2939  case ISD::VP_FMUL:
2940  return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
2941  case ISD::VP_FDIV:
2942  return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
2943  }
2944 }
2945 
2947  SelectionDAG &DAG, unsigned Flags) {
2948  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
2949 }
2950 
2952  SelectionDAG &DAG, unsigned Flags) {
2953  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
2954  Flags);
2955 }
2956 
2958  SelectionDAG &DAG, unsigned Flags) {
2959  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
2960  N->getOffset(), Flags);
2961 }
2962 
2964  SelectionDAG &DAG, unsigned Flags) {
2965  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
2966 }
2967 
2968 template <class NodeTy>
2969 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2970  bool IsLocal) const {
2971  SDLoc DL(N);
2972  EVT Ty = getPointerTy(DAG.getDataLayout());
2973 
2974  if (isPositionIndependent()) {
2975  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2976  if (IsLocal)
2977  // Use PC-relative addressing to access the symbol. This generates the
2978  // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
2979  // %pcrel_lo(auipc)).
2980  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2981 
2982  // Use PC-relative addressing to access the GOT for this symbol, then load
2983  // the address from the GOT. This generates the pattern (PseudoLA sym),
2984  // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
2985  return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
2986  }
2987 
2988  switch (getTargetMachine().getCodeModel()) {
2989  default:
2990  report_fatal_error("Unsupported code model for lowering");
2991  case CodeModel::Small: {
2992  // Generate a sequence for accessing addresses within the first 2 GiB of
2993  // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
2994  SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
2995  SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
2996  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2997  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
2998  }
2999  case CodeModel::Medium: {
3000  // Generate a sequence for accessing addresses within any 2GiB range within
3001  // the address space. This generates the pattern (PseudoLLA sym), which
3002  // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
3003  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3004  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3005  }
3006  }
3007 }
3008 
3009 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
3010  SelectionDAG &DAG) const {
3011  SDLoc DL(Op);
3012  EVT Ty = Op.getValueType();
3013  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3014  int64_t Offset = N->getOffset();
3015  MVT XLenVT = Subtarget.getXLenVT();
3016 
3017  const GlobalValue *GV = N->getGlobal();
3018  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
3019  SDValue Addr = getAddr(N, DAG, IsLocal);
3020 
3021  // In order to maximise the opportunity for common subexpression elimination,
3022  // emit a separate ADD node for the global address offset instead of folding
3023  // it in the global address node. Later peephole optimisations may choose to
3024  // fold it back in when profitable.
3025  if (Offset != 0)
3026  return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3027  DAG.getConstant(Offset, DL, XLenVT));
3028  return Addr;
3029 }
3030 
3031 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
3032  SelectionDAG &DAG) const {
3033  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
3034 
3035  return getAddr(N, DAG);
3036 }
3037 
3038 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
3039  SelectionDAG &DAG) const {
3040  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
3041 
3042  return getAddr(N, DAG);
3043 }
3044 
3045 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
3046  SelectionDAG &DAG) const {
3047  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
3048 
3049  return getAddr(N, DAG);
3050 }
3051 
3052 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3053  SelectionDAG &DAG,
3054  bool UseGOT) const {
3055  SDLoc DL(N);
3056  EVT Ty = getPointerTy(DAG.getDataLayout());
3057  const GlobalValue *GV = N->getGlobal();
3058  MVT XLenVT = Subtarget.getXLenVT();
3059 
3060  if (UseGOT) {
3061  // Use PC-relative addressing to access the GOT for this TLS symbol, then
3062  // load the address from the GOT and add the thread pointer. This generates
3063  // the pattern (PseudoLA_TLS_IE sym), which expands to
3064  // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
3065  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3066  SDValue Load =
3067  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
3068 
3069  // Add the thread pointer.
3070  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3071  return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
3072  }
3073 
3074  // Generate a sequence for accessing the address relative to the thread
3075  // pointer, with the appropriate adjustment for the thread pointer offset.
3076  // This generates the pattern
3077  // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
3078  SDValue AddrHi =
3080  SDValue AddrAdd =
3082  SDValue AddrLo =
3084 
3085  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3086  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3087  SDValue MNAdd = SDValue(
3088  DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
3089  0);
3090  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
3091 }
3092 
3093 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3094  SelectionDAG &DAG) const {
3095  SDLoc DL(N);
3096  EVT Ty = getPointerTy(DAG.getDataLayout());
3097  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3098  const GlobalValue *GV = N->getGlobal();
3099 
3100  // Use a PC-relative addressing mode to access the global dynamic GOT address.
3101  // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3102  // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3103  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3104  SDValue Load =
3105  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
3106 
3107  // Prepare argument list to generate call.
3108  ArgListTy Args;
3109  ArgListEntry Entry;
3110  Entry.Node = Load;
3111  Entry.Ty = CallTy;
3112  Args.push_back(Entry);
3113 
3114  // Setup call to __tls_get_addr.
3116  CLI.setDebugLoc(DL)
3117  .setChain(DAG.getEntryNode())
3118  .setLibCallee(CallingConv::C, CallTy,
3119  DAG.getExternalSymbol("__tls_get_addr", Ty),
3120  std::move(Args));
3121 
3122  return LowerCallTo(CLI).first;
3123 }
3124 
3125 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3126  SelectionDAG &DAG) const {
3127  SDLoc DL(Op);
3128  EVT Ty = Op.getValueType();
3129  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3130  int64_t Offset = N->getOffset();
3131  MVT XLenVT = Subtarget.getXLenVT();
3132 
3134 
3137  report_fatal_error("In GHC calling convention TLS is not supported");
3138 
3139  SDValue Addr;
3140  switch (Model) {
3141  case TLSModel::LocalExec:
3142  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3143  break;
3144  case TLSModel::InitialExec:
3145  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3146  break;
3149  Addr = getDynamicTLSAddr(N, DAG);
3150  break;
3151  }
3152 
3153  // In order to maximise the opportunity for common subexpression elimination,
3154  // emit a separate ADD node for the global address offset instead of folding
3155  // it in the global address node. Later peephole optimisations may choose to
3156  // fold it back in when profitable.
3157  if (Offset != 0)
3158  return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3159  DAG.getConstant(Offset, DL, XLenVT));
3160  return Addr;
3161 }
3162 
3163 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3164  SDValue CondV = Op.getOperand(0);
3165  SDValue TrueV = Op.getOperand(1);
3166  SDValue FalseV = Op.getOperand(2);
3167  SDLoc DL(Op);
3168  MVT VT = Op.getSimpleValueType();
3169  MVT XLenVT = Subtarget.getXLenVT();
3170 
3171  // Lower vector SELECTs to VSELECTs by splatting the condition.
3172  if (VT.isVector()) {
3173  MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3174  SDValue CondSplat = VT.isScalableVector()
3175  ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3176  : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3177  return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3178  }
3179 
3180  // If the result type is XLenVT and CondV is the output of a SETCC node
3181  // which also operated on XLenVT inputs, then merge the SETCC node into the
3182  // lowered RISCVISD::SELECT_CC to take advantage of the integer
3183  // compare+branch instructions. i.e.:
3184  // (select (setcc lhs, rhs, cc), truev, falsev)
3185  // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3186  if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3187  CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3188  SDValue LHS = CondV.getOperand(0);
3189  SDValue RHS = CondV.getOperand(1);
3190  const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3191  ISD::CondCode CCVal = CC->get();
3192 
3193  // Special case for a select of 2 constants that have a diffence of 1.
3194  // Normally this is done by DAGCombine, but if the select is introduced by
3195  // type legalization or op legalization, we miss it. Restricting to SETLT
3196  // case for now because that is what signed saturating add/sub need.
3197  // FIXME: We don't need the condition to be SETLT or even a SETCC,
3198  // but we would probably want to swap the true/false values if the condition
3199  // is SETGE/SETLE to avoid an XORI.
3200  if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3201  CCVal == ISD::SETLT) {
3202  const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
3203  const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
3204  if (TrueVal - 1 == FalseVal)
3205  return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
3206  if (TrueVal + 1 == FalseVal)
3207  return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
3208  }
3209 
3210  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3211 
3212  SDValue TargetCC = DAG.getCondCode(CCVal);
3213  SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
3214  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3215  }
3216 
3217  // Otherwise:
3218  // (select condv, truev, falsev)
3219  // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
3220  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3221  SDValue SetNE = DAG.getCondCode(ISD::SETNE);
3222 
3223  SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
3224 
3225  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3226 }
3227 
3228 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
3229  SDValue CondV = Op.getOperand(1);
3230  SDLoc DL(Op);
3231  MVT XLenVT = Subtarget.getXLenVT();
3232 
3233  if (CondV.getOpcode() == ISD::SETCC &&
3234  CondV.getOperand(0).getValueType() == XLenVT) {
3235  SDValue LHS = CondV.getOperand(0);
3236  SDValue RHS = CondV.getOperand(1);
3237  ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
3238 
3239  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3240 
3241  SDValue TargetCC = DAG.getCondCode(CCVal);
3242  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3243  LHS, RHS, TargetCC, Op.getOperand(2));
3244  }
3245 
3246  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3247  CondV, DAG.getConstant(0, DL, XLenVT),
3248  DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3249 }
3250 
3251 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3252  MachineFunction &MF = DAG.getMachineFunction();
3254 
3255  SDLoc DL(Op);
3256  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3257  getPointerTy(MF.getDataLayout()));
3258 
3259  // vastart just stores the address of the VarArgsFrameIndex slot into the
3260  // memory location argument.
3261  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3262  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3263  MachinePointerInfo(SV));
3264 }
3265 
3266 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3267  SelectionDAG &DAG) const {
3268  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3269  MachineFunction &MF = DAG.getMachineFunction();
3270  MachineFrameInfo &MFI = MF.getFrameInfo();
3271  MFI.setFrameAddressIsTaken(true);
3272  Register FrameReg = RI.getFrameRegister(MF);
3273  int XLenInBytes = Subtarget.getXLen() / 8;
3274 
3275  EVT VT = Op.getValueType();
3276  SDLoc DL(Op);
3277  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3278  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3279  while (Depth--) {
3280  int Offset = -(XLenInBytes * 2);
3281  SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3282  DAG.getIntPtrConstant(Offset, DL));
3283  FrameAddr =
3284  DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3285  }
3286  return FrameAddr;
3287 }
3288 
3289 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3290  SelectionDAG &DAG) const {
3291  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3292  MachineFunction &MF = DAG.getMachineFunction();
3293  MachineFrameInfo &MFI = MF.getFrameInfo();
3294  MFI.setReturnAddressIsTaken(true);
3295  MVT XLenVT = Subtarget.getXLenVT();
3296  int XLenInBytes = Subtarget.getXLen() / 8;
3297 
3299  return SDValue();
3300 
3301  EVT VT = Op.getValueType();
3302  SDLoc DL(Op);
3303  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3304  if (Depth) {
3305  int Off = -XLenInBytes;
3306  SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3307  SDValue Offset = DAG.getConstant(Off, DL, VT);
3308  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3309  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3310  MachinePointerInfo());
3311  }
3312 
3313  // Return the value of the return address register, marking it an implicit
3314  // live-in.
3315  Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3316  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3317 }
3318 
3319 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3320  SelectionDAG &DAG) const {
3321  SDLoc DL(Op);
3322  SDValue Lo = Op.getOperand(0);
3323  SDValue Hi = Op.getOperand(1);
3324  SDValue Shamt = Op.getOperand(2);
3325  EVT VT = Lo.getValueType();
3326 
3327  // if Shamt-XLEN < 0: // Shamt < XLEN
3328  // Lo = Lo << Shamt
3329  // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
3330  // else:
3331  // Lo = 0
3332  // Hi = Lo << (Shamt-XLEN)
3333 
3334  SDValue Zero = DAG.getConstant(0, DL, VT);
3335  SDValue One = DAG.getConstant(1, DL, VT);
3336  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3337  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3338  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3339  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3340 
3341  SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3342  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3343  SDValue ShiftRightLo =
3344  DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
3345  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3346  SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3347  SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
3348 
3349  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3350 
3351  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3352  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3353 
3354  SDValue Parts[2] = {Lo, Hi};
3355  return DAG.getMergeValues(Parts, DL);
3356 }
3357 
3358 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
3359  bool IsSRA) const {
3360  SDLoc DL(Op);
3361  SDValue Lo = Op.getOperand(0);
3362  SDValue Hi = Op.getOperand(1);
3363  SDValue Shamt = Op.getOperand(2);
3364  EVT VT = Lo.getValueType();
3365 
3366  // SRA expansion:
3367  // if Shamt-XLEN < 0: // Shamt < XLEN
3368  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3369  // Hi = Hi >>s Shamt
3370  // else:
3371  // Lo = Hi >>s (Shamt-XLEN);
3372  // Hi = Hi >>s (XLEN-1)
3373  //
3374  // SRL expansion:
3375  // if Shamt-XLEN < 0: // Shamt < XLEN
3376  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3377  // Hi = Hi >>u Shamt
3378  // else:
3379  // Lo = Hi >>u (Shamt-XLEN);
3380  // Hi = 0;
3381 
3382  unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3383 
3384  SDValue Zero = DAG.getConstant(0, DL, VT);
3385  SDValue One = DAG.getConstant(1, DL, VT);
3386  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3387  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3388  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3389  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3390 
3391  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3392  SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3393  SDValue ShiftLeftHi =
3394  DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
3395  SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3396  SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3397  SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
3398  SDValue HiFalse =
3399  IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
3400 
3401  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3402 
3403  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3404  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3405 
3406  SDValue Parts[2] = {Lo, Hi};
3407  return DAG.getMergeValues(Parts, DL);
3408 }
3409 
3410 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
3411 // legal equivalently-sized i8 type, so we can use that as a go-between.
3412 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
3413  SelectionDAG &DAG) const {
3414  SDLoc DL(Op);
3415  MVT VT = Op.getSimpleValueType();
3416  SDValue SplatVal = Op.getOperand(0);
3417  // All-zeros or all-ones splats are handled specially.
3418  if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
3419  SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3420  return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
3421  }
3422  if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
3423  SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3424  return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
3425  }
3426  MVT XLenVT = Subtarget.getXLenVT();
3427  assert(SplatVal.getValueType() == XLenVT &&
3428  "Unexpected type for i1 splat value");
3429  MVT InterVT = VT.changeVectorElementType(MVT::i8);
3430  SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
3431  DAG.getConstant(1, DL, XLenVT));
3432  SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
3433  SDValue Zero = DAG.getConstant(0, DL, InterVT);
3434  return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
3435 }
3436 
3437 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
3438 // illegal (currently only vXi64 RV32).
3439 // FIXME: We could also catch non-constant sign-extended i32 values and lower
3440 // them to SPLAT_VECTOR_I64
3441 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
3442  SelectionDAG &DAG) const {
3443  SDLoc DL(Op);
3444  MVT VecVT = Op.getSimpleValueType();
3445  assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
3446  "Unexpected SPLAT_VECTOR_PARTS lowering");
3447 
3448  assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
3449  SDValue Lo = Op.getOperand(0);
3450  SDValue Hi = Op.getOperand(1);
3451 
3452  if (VecVT.isFixedLengthVector()) {
3453  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3454  SDLoc DL(Op);
3455  SDValue Mask, VL;
3456  std::tie(Mask, VL) =
3457  getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3458 
3459  SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
3460  return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
3461  }
3462 
3463  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3464  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3465  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3466  // If Hi constant is all the same sign bit as Lo, lower this as a custom
3467  // node in order to try and match RVV vector/scalar instructions.
3468  if ((LoC >> 31) == HiC)
3469  return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3470  }
3471 
3472  // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3473  if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3474  isa<ConstantSDNode>(Hi.getOperand(1)) &&
3475  Hi.getConstantOperandVal(1) == 31)
3476  return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3477 
3478  // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
3479  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
3481 }
3482 
3483 // Custom-lower extensions from mask vectors by using a vselect either with 1
3484 // for zero/any-extension or -1 for sign-extension:
3485 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
3486 // Note that any-extension is lowered identically to zero-extension.
3487 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
3488  int64_t ExtTrueVal) const {
3489  SDLoc DL(Op);
3490  MVT VecVT = Op.getSimpleValueType();
3491  SDValue Src = Op.getOperand(0);
3492  // Only custom-lower extensions from mask types
3493  assert(Src.getValueType().isVector() &&
3494  Src.getValueType().getVectorElementType() == MVT::i1);
3495 
3496  MVT XLenVT = Subtarget.getXLenVT();
3497  SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
3498  SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
3499 
3500  if (VecVT.isScalableVector()) {
3501  // Be careful not to introduce illegal scalar types at this stage, and be
3502  // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
3503  // illegal and must be expanded. Since we know that the constants are
3504  // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
3505  bool IsRV32E64 =
3506  !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
3507 
3508  if (!IsRV32E64) {
3509  SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
3510  SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
3511  } else {
3512  SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
3513  SplatTrueVal =
3514  DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
3515  }
3516 
3517  return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
3518  }
3519 
3520  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3521  MVT I1ContainerVT =
3523 
3524  SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
3525 
3526  SDValue Mask, VL;
3527  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3528 
3529  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
3530  SplatTrueVal =
3531  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
3532  SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
3533  SplatTrueVal, SplatZero, VL);
3534 
3535  return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
3536 }
3537 
3538 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
3539  SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
3540  MVT ExtVT = Op.getSimpleValueType();
3541  // Only custom-lower extensions from fixed-length vector types.
3542  if (!ExtVT.isFixedLengthVector())
3543  return Op;
3544  MVT VT = Op.getOperand(0).getSimpleValueType();
3545  // Grab the canonical container type for the extended type. Infer the smaller
3546  // type from that to ensure the same number of vector elements, as we know
3547  // the LMUL will be sufficient to hold the smaller type.
3548  MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
3549  // Get the extended container type manually to ensure the same number of
3550  // vector elements between source and dest.
3551  MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
3552  ContainerExtVT.getVectorElementCount());
3553 
3554  SDValue Op1 =
3555  convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
3556 
3557  SDLoc DL(Op);
3558  SDValue Mask, VL;
3559  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3560 
3561  SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
3562 
3563  return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
3564 }
3565 
3566 // Custom-lower truncations from vectors to mask vectors by using a mask and a
3567 // setcc operation:
3568 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
3569 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
3570  SelectionDAG &DAG) const {
3571  SDLoc DL(Op);
3572  EVT MaskVT = Op.getValueType();
3573  // Only expect to custom-lower truncations to mask types
3574  assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
3575  "Unexpected type for vector mask lowering");
3576  SDValue Src = Op.getOperand(0);
3577  MVT VecVT = Src.getSimpleValueType();
3578 
3579  // If this is a fixed vector, we need to convert it to a scalable vector.
3580  MVT ContainerVT = VecVT;
3581  if (VecVT.isFixedLengthVector()) {
3582  ContainerVT = getContainerForFixedLengthVector(VecVT);
3583  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3584  }
3585 
3586  SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
3587  SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
3588 
3589  SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
3590  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
3591 
3592  if (VecVT.isScalableVector()) {
3593  SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
3594  return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
3595  }
3596 
3597  SDValue Mask, VL;
3598  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3599 
3600  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
3601  SDValue Trunc =
3602  DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
3603  Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
3604  DAG.getCondCode(ISD::SETNE), Mask, VL);
3605  return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
3606 }
3607 
3608 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
3609 // first position of a vector, and that vector is slid up to the insert index.
3610 // By limiting the active vector length to index+1 and merging with the
3611 // original vector (with an undisturbed tail policy for elements >= VL), we
3612 // achieve the desired result of leaving all elements untouched except the one
3613 // at VL-1, which is replaced with the desired value.
3614 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3615  SelectionDAG &DAG) const {
3616  SDLoc DL(Op);
3617  MVT VecVT = Op.getSimpleValueType();
3618  SDValue Vec = Op.getOperand(0);
3619  SDValue Val = Op.getOperand(1);
3620  SDValue Idx = Op.getOperand(2);
3621 
3622  if (VecVT.getVectorElementType() == MVT::i1) {
3623  // FIXME: For now we just promote to an i8 vector and insert into that,
3624  // but this is probably not optimal.
3626  Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3627  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
3628  return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
3629  }
3630 
3631  MVT ContainerVT = VecVT;
3632  // If the operand is a fixed-length vector, convert to a scalable one.
3633  if (VecVT.isFixedLengthVector()) {
3634  ContainerVT = getContainerForFixedLengthVector(VecVT);
3635  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3636  }
3637 
3638  MVT XLenVT = Subtarget.getXLenVT();
3639 
3640  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3641  bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
3642  // Even i64-element vectors on RV32 can be lowered without scalar
3643  // legalization if the most-significant 32 bits of the value are not affected
3644  // by the sign-extension of the lower 32 bits.
3645  // TODO: We could also catch sign extensions of a 32-bit value.
3646  if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
3647  const auto *CVal = cast<ConstantSDNode>(Val);
3648  if (isInt<32>(CVal->getSExtValue())) {
3649  IsLegalInsert = true;
3650  Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3651  }
3652  }
3653 
3654  SDValue Mask, VL;
3655  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3656 
3657  SDValue ValInVec;
3658 
3659  if (IsLegalInsert) {
3660  unsigned Opc =
3662  if (isNullConstant(Idx)) {
3663  Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
3664  if (!VecVT.isFixedLengthVector())
3665  return Vec;
3666  return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
3667  }
3668  ValInVec =
3669  DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
3670  } else {
3671  // On RV32, i64-element vectors must be specially handled to place the
3672  // value at element 0, by using two vslide1up instructions in sequence on
3673  // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
3674  // this.
3675  SDValue One = DAG.getConstant(1, DL, XLenVT);
3676  SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
3677  SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
3678  MVT I32ContainerVT =
3679  MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
3680  SDValue I32Mask =
3681  getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
3682  // Limit the active VL to two.
3683  SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
3684  // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
3685  // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
3686  ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
3687  InsertI64VL);
3688  // First slide in the hi value, then the lo in underneath it.
3689  ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3690  ValHi, I32Mask, InsertI64VL);
3691  ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3692  ValLo, I32Mask, InsertI64VL);
3693  // Bitcast back to the right container type.
3694  ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
3695  }
3696 
3697  // Now that the value is in a vector, slide it into position.
3698  SDValue InsertVL =
3699  DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
3700  SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
3701  ValInVec, Idx, Mask, InsertVL);
3702  if (!VecVT.isFixedLengthVector())
3703  return Slideup;
3704  return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
3705 }
3706 
3707 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
3708 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
3709 // types this is done using VMV_X_S to allow us to glean information about the
3710 // sign bits of the result.
3711 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3712  SelectionDAG &DAG) const {
3713  SDLoc DL(Op);
3714  SDValue Idx = Op.getOperand(1);
3715  SDValue Vec = Op.getOperand(0);
3716  EVT EltVT = Op.getValueType();
3717  MVT VecVT = Vec.getSimpleValueType();
3718  MVT XLenVT = Subtarget.getXLenVT();
3719 
3720  if (VecVT.getVectorElementType() == MVT::i1) {
3721  // FIXME: For now we just promote to an i8 vector and extract from that,
3722  // but this is probably not optimal.
3724  Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3725  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
3726  }
3727 
3728  // If this is a fixed vector, we need to convert it to a scalable vector.
3729  MVT ContainerVT = VecVT;
3730  if (VecVT.isFixedLengthVector()) {
3731  ContainerVT = getContainerForFixedLengthVector(VecVT);
3732  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3733  }
3734 
3735  // If the index is 0, the vector is already in the right position.
3736  if (!isNullConstant(Idx)) {
3737  // Use a VL of 1 to avoid processing more elements than we need.
3738  SDValue VL = DAG.getConstant(1, DL, XLenVT);
3739  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3740  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3741  Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3742  DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
3743  }
3744 
3745  if (!EltVT.isInteger()) {
3746  // Floating-point extracts are handled in TableGen.
3747  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
3748  DAG.getConstant(0, DL, XLenVT));
3749  }
3750 
3751  SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
3752  return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
3753 }
3754 
3755 // Some RVV intrinsics may claim that they want an integer operand to be
3756 // promoted or expanded.
3758  const RISCVSubtarget &Subtarget) {
3759  assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3760  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
3761  "Unexpected opcode");
3762 
3763  if (!Subtarget.hasStdExtV())
3764  return SDValue();
3765 
3766  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
3767  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
3768  SDLoc DL(Op);
3769 
3771  RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
3772  if (!II || !II->SplatOperand)
3773  return SDValue();
3774 
3775  unsigned SplatOp = II->SplatOperand + HasChain;
3776  assert(SplatOp < Op.getNumOperands());
3777 
3778  SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
3779  SDValue &ScalarOp = Operands[SplatOp];
3780  MVT OpVT = ScalarOp.getSimpleValueType();
3781  MVT XLenVT = Subtarget.getXLenVT();
3782 
3783  // If this isn't a scalar, or its type is XLenVT we're done.
3784  if (!OpVT.isScalarInteger() || OpVT == XLenVT)
3785  return SDValue();
3786 
3787  // Simplest case is that the operand needs to be promoted to XLenVT.
3788  if (OpVT.bitsLT(XLenVT)) {
3789  // If the operand is a constant, sign extend to increase our chances
3790  // of being able to use a .vi instruction. ANY_EXTEND would become a
3791  // a zero extend and the simm5 check in isel would fail.
3792  // FIXME: Should we ignore the upper bits in isel instead?
3793  unsigned ExtOpc =
3794  isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3795  ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
3796  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3797  }
3798 
3799  // Use the previous operand to get the vXi64 VT. The result might be a mask
3800  // VT for compares. Using the previous operand assumes that the previous
3801  // operand will never have a smaller element size than a scalar operand and
3802  // that a widening operation never uses SEW=64.
3803  // NOTE: If this fails the below assert, we can probably just find the
3804  // element count from any operand or result and use it to construct the VT.
3805  assert(II->SplatOperand > 1 && "Unexpected splat operand!");
3806  MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
3807 
3808  // The more complex case is when the scalar is larger than XLenVT.
3809  assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
3810  VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
3811 
3812  // If this is a sign-extended 32-bit constant, we can truncate it and rely
3813  // on the instruction to sign-extend since SEW>XLEN.
3814  if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
3815  if (isInt<32>(CVal->getSExtValue())) {
3816  ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3817  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3818  }
3819  }
3820 
3821  // We need to convert the scalar to a splat vector.
3822  // FIXME: Can we implicitly truncate the scalar if it is known to
3823  // be sign extended?
3824  // VL should be the last operand.
3825  SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
3826  assert(VL.getValueType() == XLenVT);
3827  ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
3828  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3829 }
3830 
3831 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3832  SelectionDAG &DAG) const {
3833  unsigned IntNo = Op.getConstantOperandVal(0);
3834  SDLoc DL(Op);
3835  MVT XLenVT = Subtarget.getXLenVT();
3836 
3837  switch (IntNo) {
3838  default:
3839  break; // Don't custom lower most intrinsics.
3840  case Intrinsic::thread_pointer: {
3841  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3842  return DAG.getRegister(RISCV::X4, PtrVT);
3843  }
3844  case Intrinsic::riscv_orc_b:
3845  // Lower to the GORCI encoding for orc.b.
3846  return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
3847  DAG.getConstant(7, DL, XLenVT));
3848  case Intrinsic::riscv_grev:
3849  case Intrinsic::riscv_gorc: {
3850  unsigned Opc =
3851  IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
3852  return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3853  }
3854  case Intrinsic::riscv_shfl:
3855  case Intrinsic::riscv_unshfl: {
3856  unsigned Opc =
3857  IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
3858  return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3859  }
3860  case Intrinsic::riscv_bcompress:
3861  case Intrinsic::riscv_bdecompress: {
3862  unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
3864  return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3865  }
3866  case Intrinsic::riscv_vmv_x_s:
3867  assert(Op.getValueType() == XLenVT && "Unexpected VT!");
3868  return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
3869  Op.getOperand(1));
3870  case Intrinsic::riscv_vmv_v_x:
3871  return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
3872  Op.getSimpleValueType(), DL, DAG, Subtarget);
3873  case Intrinsic::riscv_vfmv_v_f:
3874  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
3875  Op.getOperand(1), Op.getOperand(2));
3876  case Intrinsic::riscv_vmv_s_x: {
3877  SDValue Scalar = Op.getOperand(2);
3878 
3879  if (Scalar.getValueType().bitsLE(XLenVT)) {
3880  Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
3881  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
3882  Op.getOperand(1), Scalar, Op.getOperand(3));
3883  }
3884 
3885  assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
3886 
3887  // This is an i64 value that lives in two scalar registers. We have to
3888  // insert this in a convoluted way. First we build vXi64 splat containing
3889  // the/ two values that we assemble using some bit math. Next we'll use
3890  // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
3891  // to merge element 0 from our splat into the source vector.
3892  // FIXME: This is probably not the best way to do this, but it is
3893  // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
3894  // point.
3895  // sw lo, (a0)
3896  // sw hi, 4(a0)
3897  // vlse vX, (a0)
3898  //
3899  // vid.v vVid
3900  // vmseq.vx mMask, vVid, 0
3901  // vmerge.vvm vDest, vSrc, vVal, mMask
3902  MVT VT = Op.getSimpleValueType();
3903  SDValue Vec = Op.getOperand(1);
3904  SDValue VL = Op.getOperand(3);
3905 
3906  SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
3907  SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
3908  DAG.getConstant(0, DL, MVT::i32), VL);
3909 
3911  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3912  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
3913  SDValue SelectCond =
3914  DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
3915  DAG.getCondCode(ISD::SETEQ), Mask, VL);
3916  return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
3917  Vec, VL);
3918  }
3919  case Intrinsic::riscv_vslide1up:
3920  case Intrinsic::riscv_vslide1down:
3921  case Intrinsic::riscv_vslide1up_mask:
3922  case Intrinsic::riscv_vslide1down_mask: {
3923  // We need to special case these when the scalar is larger than XLen.
3924  unsigned NumOps = Op.getNumOperands();
3925  bool IsMasked = NumOps == 7;
3926  unsigned OpOffset = IsMasked ? 1 : 0;
3927  SDValue Scalar = Op.getOperand(2 + OpOffset);
3928  if (Scalar.getValueType().bitsLE(XLenVT))
3929  break;
3930 
3931  // Splatting a sign extended constant is fine.
3932  if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
3933  if (isInt<32>(CVal->getSExtValue()))
3934  break;
3935 
3936  MVT VT = Op.getSimpleValueType();
3938  Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
3939 
3940  // Convert the vector source to the equivalent nxvXi32 vector.
3942  SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
3943 
3944  SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3945  DAG.getConstant(0, DL, XLenVT));
3946  SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3947  DAG.getConstant(1, DL, XLenVT));
3948 
3949  // Double the VL since we halved SEW.
3950  SDValue VL = Op.getOperand(NumOps - (1 + OpOffset));
3951  SDValue I32VL =
3952  DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
3953 
3954  MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
3955  SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
3956 
3957  // Shift the two scalar parts in using SEW=32 slide1up/slide1down
3958  // instructions.
3959  if (IntNo == Intrinsic::riscv_vslide1up ||
3960  IntNo == Intrinsic::riscv_vslide1up_mask) {
3961  Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
3962  I32Mask, I32VL);
3963  Vec = DAG.