LLVM  14.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
16 #include "RISCV.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
29 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/IntrinsicsRISCV.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/KnownBits.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
46  const RISCVSubtarget &STI)
47  : TargetLowering(TM), Subtarget(STI) {
48 
49  if (Subtarget.isRV32E())
50  report_fatal_error("Codegen not yet implemented for RV32E");
51 
52  RISCVABI::ABI ABI = Subtarget.getTargetABI();
53  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
56  !Subtarget.hasStdExtF()) {
57  errs() << "Hard-float 'f' ABI can't be used for a target that "
58  "doesn't support the F instruction set extension (ignoring "
59  "target-abi)\n";
61  } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62  !Subtarget.hasStdExtD()) {
63  errs() << "Hard-float 'd' ABI can't be used for a target that "
64  "doesn't support the D instruction set extension (ignoring "
65  "target-abi)\n";
67  }
68 
69  switch (ABI) {
70  default:
71  report_fatal_error("Don't know how to lower this ABI");
75  case RISCVABI::ABI_LP64:
78  break;
79  }
80 
81  MVT XLenVT = Subtarget.getXLenVT();
82 
83  // Set up the register classes.
84  addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86  if (Subtarget.hasStdExtZfh())
87  addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88  if (Subtarget.hasStdExtF())
89  addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90  if (Subtarget.hasStdExtD())
91  addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93  static const MVT::SimpleValueType BoolVecVTs[] = {
96  static const MVT::SimpleValueType IntVecVTs[] = {
102  static const MVT::SimpleValueType F16VecVTs[] = {
105  static const MVT::SimpleValueType F32VecVTs[] = {
107  static const MVT::SimpleValueType F64VecVTs[] = {
109 
110  if (Subtarget.hasStdExtV()) {
111  auto addRegClassForRVV = [this](MVT VT) {
112  unsigned Size = VT.getSizeInBits().getKnownMinValue();
113  assert(Size <= 512 && isPowerOf2_32(Size));
114  const TargetRegisterClass *RC;
115  if (Size <= 64)
116  RC = &RISCV::VRRegClass;
117  else if (Size == 128)
118  RC = &RISCV::VRM2RegClass;
119  else if (Size == 256)
120  RC = &RISCV::VRM4RegClass;
121  else
122  RC = &RISCV::VRM8RegClass;
123 
124  addRegisterClass(VT, RC);
125  };
126 
127  for (MVT VT : BoolVecVTs)
128  addRegClassForRVV(VT);
129  for (MVT VT : IntVecVTs)
130  addRegClassForRVV(VT);
131 
132  if (Subtarget.hasStdExtZfh())
133  for (MVT VT : F16VecVTs)
134  addRegClassForRVV(VT);
135 
136  if (Subtarget.hasStdExtF())
137  for (MVT VT : F32VecVTs)
138  addRegClassForRVV(VT);
139 
140  if (Subtarget.hasStdExtD())
141  for (MVT VT : F64VecVTs)
142  addRegClassForRVV(VT);
143 
144  if (Subtarget.useRVVForFixedLengthVectors()) {
145  auto addRegClassForFixedVectors = [this](MVT VT) {
146  MVT ContainerVT = getContainerForFixedLengthVector(VT);
147  unsigned RCID = getRegClassIDForVecVT(ContainerVT);
148  const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
149  addRegisterClass(VT, TRI.getRegClass(RCID));
150  };
152  if (useRVVForFixedLengthVectorVT(VT))
153  addRegClassForFixedVectors(VT);
154 
156  if (useRVVForFixedLengthVectorVT(VT))
157  addRegClassForFixedVectors(VT);
158  }
159  }
160 
161  // Compute derived properties from the register classes.
163 
165 
166  for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
167  setLoadExtAction(N, XLenVT, MVT::i1, Promote);
168 
169  // TODO: add all necessary setOperationAction calls.
171 
176 
179 
184 
186  if (!Subtarget.hasStdExtZbb()) {
189  }
190 
191  if (Subtarget.is64Bit()) {
197 
202  }
203 
204  if (!Subtarget.hasStdExtM()) {
212  } else {
213  if (Subtarget.is64Bit()) {
216 
226  } else {
228  }
229  }
230 
235 
239 
240  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
241  if (Subtarget.is64Bit()) {
244  }
245  } else {
248  }
249 
250  if (Subtarget.hasStdExtZbp()) {
251  // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
252  // more combining.
256  // BSWAP i8 doesn't exist.
259 
260  if (Subtarget.is64Bit()) {
263  }
264  } else {
265  // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
266  // pattern match it directly in isel.
268  Subtarget.hasStdExtZbb() ? Legal : Expand);
269  }
270 
271  if (Subtarget.hasStdExtZbb()) {
276 
277  if (Subtarget.is64Bit()) {
282  }
283  } else {
287  }
288 
289  if (Subtarget.hasStdExtZbt()) {
293 
294  if (Subtarget.is64Bit()) {
297  }
298  } else {
300  }
301 
302  ISD::CondCode FPCCToExpand[] = {
306 
307  ISD::NodeType FPOpToExpand[] = {
310 
311  if (Subtarget.hasStdExtZfh())
313 
314  if (Subtarget.hasStdExtZfh()) {
321  for (auto CC : FPCCToExpand)
326  for (auto Op : FPOpToExpand)
328  }
329 
330  if (Subtarget.hasStdExtF()) {
337  for (auto CC : FPCCToExpand)
342  for (auto Op : FPOpToExpand)
346  }
347 
348  if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
350 
351  if (Subtarget.hasStdExtD()) {
358  for (auto CC : FPCCToExpand)
365  for (auto Op : FPOpToExpand)
369  }
370 
371  if (Subtarget.is64Bit()) {
376  }
377 
378  if (Subtarget.hasStdExtF()) {
381  }
382 
387 
389 
390  // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
391  // Unfortunately this can't be determined just from the ISA naming string.
393  Subtarget.is64Bit() ? Legal : Custom);
394 
398  if (Subtarget.is64Bit())
400 
401  if (Subtarget.hasStdExtA()) {
404  } else {
406  }
407 
409 
410  if (Subtarget.hasStdExtV()) {
412 
414 
415  // RVV intrinsics may have illegal operands.
416  // We also need to custom legalize vmv.x.s.
421  if (Subtarget.is64Bit()) {
423  } else {
426  }
427 
429 
430  static unsigned IntegerVPOps[] = {
431  ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, ISD::VP_SDIV, ISD::VP_UDIV,
432  ISD::VP_SREM, ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR,
433  ISD::VP_ASHR, ISD::VP_LSHR, ISD::VP_SHL};
434 
435  static unsigned FloatingPointVPOps[] = {ISD::VP_FADD, ISD::VP_FSUB,
436  ISD::VP_FMUL, ISD::VP_FDIV};
437 
438  if (!Subtarget.is64Bit()) {
439  // We must custom-lower certain vXi64 operations on RV32 due to the vector
440  // element type being illegal.
443 
452  }
453 
454  for (MVT VT : BoolVecVTs) {
456 
457  // Mask VTs are custom-expanded into a series of standard nodes
462 
465 
469 
473 
474  // RVV has native int->float & float->int conversions where the
475  // element type sizes are within one power-of-two of each other. Any
476  // wider distances between type sizes have to be lowered as sequences
477  // which progressively narrow the gap in stages.
482 
483  // Expand all extending loads to types larger than this, and truncating
484  // stores from types larger than this.
485  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
486  setTruncStoreAction(OtherVT, VT, Expand);
487  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
488  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
489  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
490  }
491  }
492 
493  for (MVT VT : IntVecVTs) {
496 
501 
504 
505  // Custom-lower extensions and truncations from/to mask types.
509 
510  // RVV has native int->float & float->int conversions where the
511  // element type sizes are within one power-of-two of each other. Any
512  // wider distances between type sizes have to be lowered as sequences
513  // which progressively narrow the gap in stages.
518 
523 
524  // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
525  // nodes which truncate by one power of two at a time.
527 
528  // Custom-lower insert/extract operations to simplify patterns.
531 
532  // Custom-lower reduction operations to set up the corresponding custom
533  // nodes' operands.
542 
543  for (unsigned VPOpc : IntegerVPOps)
544  setOperationAction(VPOpc, VT, Custom);
545 
548 
553 
557 
560 
563 
564  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
565  setTruncStoreAction(VT, OtherVT, Expand);
566  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
567  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
568  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
569  }
570  }
571 
572  // Expand various CCs to best match the RVV ISA, which natively supports UNE
573  // but no other unordered comparisons, and supports all ordered comparisons
574  // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
575  // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
576  // and we pattern-match those back to the "original", swapping operands once
577  // more. This way we catch both operations and both "vf" and "fv" forms with
578  // fewer patterns.
579  ISD::CondCode VFPCCToExpand[] = {
583  };
584 
585  // Sets common operation actions on RVV floating-point vector types.
586  const auto SetCommonVFPActions = [&](MVT VT) {
588  // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
589  // sizes are within one power-of-two of each other. Therefore conversions
590  // between vXf16 and vXf64 must be lowered as sequences which convert via
591  // vXf32.
594  // Custom-lower insert/extract operations to simplify patterns.
597  // Expand various condition codes (explained above).
598  for (auto CC : VFPCCToExpand)
599  setCondCodeAction(CC, VT, Expand);
600 
603 
609 
612 
617 
620 
624 
626 
627  for (unsigned VPOpc : FloatingPointVPOps)
628  setOperationAction(VPOpc, VT, Custom);
629  };
630 
631  // Sets common extload/truncstore actions on RVV floating-point vector
632  // types.
633  const auto SetCommonVFPExtLoadTruncStoreActions =
634  [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
635  for (auto SmallVT : SmallerVTs) {
636  setTruncStoreAction(VT, SmallVT, Expand);
637  setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
638  }
639  };
640 
641  if (Subtarget.hasStdExtZfh())
642  for (MVT VT : F16VecVTs)
643  SetCommonVFPActions(VT);
644 
645  for (MVT VT : F32VecVTs) {
646  if (Subtarget.hasStdExtF())
647  SetCommonVFPActions(VT);
648  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
649  }
650 
651  for (MVT VT : F64VecVTs) {
652  if (Subtarget.hasStdExtD())
653  SetCommonVFPActions(VT);
654  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
655  SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
656  }
657 
658  if (Subtarget.useRVVForFixedLengthVectors()) {
660  if (!useRVVForFixedLengthVectorVT(VT))
661  continue;
662 
663  // By default everything must be expanded.
664  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
666  for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
667  setTruncStoreAction(VT, OtherVT, Expand);
668  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
669  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
670  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
671  }
672 
673  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
676 
679 
682 
685 
687 
689 
691 
693 
697 
702 
703  // Operations below are different for between masks and other vectors.
704  if (VT.getVectorElementType() == MVT::i1) {
708  continue;
709  }
710 
711  // Use SPLAT_VECTOR to prevent type legalization from destroying the
712  // splats when type legalizing i64 scalar on RV32.
713  // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
714  // improvements first.
715  if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
718  }
719 
722 
740 
746 
749 
754 
757 
761 
762  // Custom-lower reduction operations to set up the corresponding custom
763  // nodes' operands.
769 
770  for (unsigned VPOpc : IntegerVPOps)
771  setOperationAction(VPOpc, VT, Custom);
772  }
773 
775  if (!useRVVForFixedLengthVectorVT(VT))
776  continue;
777 
778  // By default everything must be expanded.
779  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
781  for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
782  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
783  setTruncStoreAction(VT, OtherVT, Expand);
784  }
785 
786  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
789 
794 
812 
815 
816  for (auto CC : VFPCCToExpand)
817  setCondCodeAction(CC, VT, Expand);
818 
822 
824 
829 
830  for (unsigned VPOpc : FloatingPointVPOps)
831  setOperationAction(VPOpc, VT, Custom);
832  }
833 
834  // Custom-legalize bitcasts from fixed-length vectors to scalar types.
842  }
843  }
844 
845  // Function alignments.
846  const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
847  setMinFunctionAlignment(FunctionAlignment);
848  setPrefFunctionAlignment(FunctionAlignment);
849 
851 
852  // Jumps are expensive, compared to logic
854 
855  // We can use any register for comparisons
857 
863  if (Subtarget.hasStdExtV()) {
870  }
871 }
872 
875  EVT VT) const {
876  if (!VT.isVector())
877  return getPointerTy(DL);
878  if (Subtarget.hasStdExtV() &&
879  (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
882 }
883 
884 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
885  return Subtarget.getXLenVT();
886 }
887 
889  const CallInst &I,
890  MachineFunction &MF,
891  unsigned Intrinsic) const {
892  switch (Intrinsic) {
893  default:
894  return false;
895  case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
896  case Intrinsic::riscv_masked_atomicrmw_add_i32:
897  case Intrinsic::riscv_masked_atomicrmw_sub_i32:
898  case Intrinsic::riscv_masked_atomicrmw_nand_i32:
899  case Intrinsic::riscv_masked_atomicrmw_max_i32:
900  case Intrinsic::riscv_masked_atomicrmw_min_i32:
901  case Intrinsic::riscv_masked_atomicrmw_umax_i32:
902  case Intrinsic::riscv_masked_atomicrmw_umin_i32:
903  case Intrinsic::riscv_masked_cmpxchg_i32: {
904  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
906  Info.memVT = MVT::getVT(PtrTy->getElementType());
907  Info.ptrVal = I.getArgOperand(0);
908  Info.offset = 0;
909  Info.align = Align(4);
912  return true;
913  }
914  }
915 }
916 
918  const AddrMode &AM, Type *Ty,
919  unsigned AS,
920  Instruction *I) const {
921  // No global is ever allowed as a base.
922  if (AM.BaseGV)
923  return false;
924 
925  // Require a 12-bit signed offset.
926  if (!isInt<12>(AM.BaseOffs))
927  return false;
928 
929  switch (AM.Scale) {
930  case 0: // "r+i" or just "i", depending on HasBaseReg.
931  break;
932  case 1:
933  if (!AM.HasBaseReg) // allow "r+i".
934  break;
935  return false; // disallow "r+r" or "r+r+i".
936  default:
937  return false;
938  }
939 
940  return true;
941 }
942 
944  return isInt<12>(Imm);
945 }
946 
948  return isInt<12>(Imm);
949 }
950 
951 // On RV32, 64-bit integers are split into their high and low parts and held
952 // in two different registers, so the trunc is free since the low register can
953 // just be used.
954 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
955  if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
956  return false;
957  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
958  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
959  return (SrcBits == 64 && DestBits == 32);
960 }
961 
962 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
963  if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
964  !SrcVT.isInteger() || !DstVT.isInteger())
965  return false;
966  unsigned SrcBits = SrcVT.getSizeInBits();
967  unsigned DestBits = DstVT.getSizeInBits();
968  return (SrcBits == 64 && DestBits == 32);
969 }
970 
972  // Zexts are free if they can be combined with a load.
973  if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
974  EVT MemVT = LD->getMemoryVT();
975  if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
976  (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
977  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
978  LD->getExtensionType() == ISD::ZEXTLOAD))
979  return true;
980  }
981 
982  return TargetLowering::isZExtFree(Val, VT2);
983 }
984 
986  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
987 }
988 
990  return Subtarget.hasStdExtZbb();
991 }
992 
994  return Subtarget.hasStdExtZbb();
995 }
996 
998  bool ForCodeSize) const {
999  if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1000  return false;
1001  if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1002  return false;
1003  if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1004  return false;
1005  if (Imm.isNegZero())
1006  return false;
1007  return Imm.isZero();
1008 }
1009 
1011  return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1012  (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1013  (VT == MVT::f64 && Subtarget.hasStdExtD());
1014 }
1015 
1017  CallingConv::ID CC,
1018  EVT VT) const {
1019  // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
1020  // end up using a GPR but that will be decided based on ABI.
1021  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1022  return MVT::f32;
1023 
1025 }
1026 
1028  CallingConv::ID CC,
1029  EVT VT) const {
1030  // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
1031  // end up using a GPR but that will be decided based on ABI.
1032  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1033  return 1;
1034 
1036 }
1037 
1038 // Changes the condition code and swaps operands if necessary, so the SetCC
1039 // operation matches one of the comparisons supported directly by branches
1040 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1041 // with 1/-1.
1042 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1043  ISD::CondCode &CC, SelectionDAG &DAG) {
1044  // Convert X > -1 to X >= 0.
1045  if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1046  RHS = DAG.getConstant(0, DL, RHS.getValueType());
1047  CC = ISD::SETGE;
1048  return;
1049  }
1050  // Convert X < 1 to 0 >= X.
1051  if (CC == ISD::SETLT && isOneConstant(RHS)) {
1052  RHS = LHS;
1053  LHS = DAG.getConstant(0, DL, RHS.getValueType());
1054  CC = ISD::SETGE;
1055  return;
1056  }
1057 
1058  switch (CC) {
1059  default:
1060  break;
1061  case ISD::SETGT:
1062  case ISD::SETLE:
1063  case ISD::SETUGT:
1064  case ISD::SETULE:
1066  std::swap(LHS, RHS);
1067  break;
1068  }
1069 }
1070 
1071 // Return the RISC-V branch opcode that matches the given DAG integer
1072 // condition code. The CondCode must be one of those supported by the RISC-V
1073 // ISA (see translateSetCCForBranch).
1075  switch (CC) {
1076  default:
1077  llvm_unreachable("Unsupported CondCode");
1078  case ISD::SETEQ:
1079  return RISCV::BEQ;
1080  case ISD::SETNE:
1081  return RISCV::BNE;
1082  case ISD::SETLT:
1083  return RISCV::BLT;
1084  case ISD::SETGE:
1085  return RISCV::BGE;
1086  case ISD::SETULT:
1087  return RISCV::BLTU;
1088  case ISD::SETUGE:
1089  return RISCV::BGEU;
1090  }
1091 }
1092 
1094  assert(VT.isScalableVector() && "Expecting a scalable vector type");
1095  unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1096  if (VT.getVectorElementType() == MVT::i1)
1097  KnownSize *= 8;
1098 
1099  switch (KnownSize) {
1100  default:
1101  llvm_unreachable("Invalid LMUL.");
1102  case 8:
1103  return RISCVII::VLMUL::LMUL_F8;
1104  case 16:
1105  return RISCVII::VLMUL::LMUL_F4;
1106  case 32:
1107  return RISCVII::VLMUL::LMUL_F2;
1108  case 64:
1109  return RISCVII::VLMUL::LMUL_1;
1110  case 128:
1111  return RISCVII::VLMUL::LMUL_2;
1112  case 256:
1113  return RISCVII::VLMUL::LMUL_4;
1114  case 512:
1115  return RISCVII::VLMUL::LMUL_8;
1116  }
1117 }
1118 
1120  switch (LMul) {
1121  default:
1122  llvm_unreachable("Invalid LMUL.");
1127  return RISCV::VRRegClassID;
1129  return RISCV::VRM2RegClassID;
1131  return RISCV::VRM4RegClassID;
1133  return RISCV::VRM8RegClassID;
1134  }
1135 }
1136 
1138  RISCVII::VLMUL LMUL = getLMUL(VT);
1139  if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1143  static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1144  "Unexpected subreg numbering");
1145  return RISCV::sub_vrm1_0 + Index;
1146  }
1147  if (LMUL == RISCVII::VLMUL::LMUL_2) {
1148  static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1149  "Unexpected subreg numbering");
1150  return RISCV::sub_vrm2_0 + Index;
1151  }
1152  if (LMUL == RISCVII::VLMUL::LMUL_4) {
1153  static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1154  "Unexpected subreg numbering");
1155  return RISCV::sub_vrm4_0 + Index;
1156  }
1157  llvm_unreachable("Invalid vector type.");
1158 }
1159 
1161  if (VT.getVectorElementType() == MVT::i1)
1162  return RISCV::VRRegClassID;
1163  return getRegClassIDForLMUL(getLMUL(VT));
1164 }
1165 
1166 // Attempt to decompose a subvector insert/extract between VecVT and
1167 // SubVecVT via subregister indices. Returns the subregister index that
1168 // can perform the subvector insert/extract with the given element index, as
1169 // well as the index corresponding to any leftover subvectors that must be
1170 // further inserted/extracted within the register class for SubVecVT.
1171 std::pair<unsigned, unsigned>
1173  MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1174  const RISCVRegisterInfo *TRI) {
1175  static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1176  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1177  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1178  "Register classes not ordered");
1179  unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1180  unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1181  // Try to compose a subregister index that takes us from the incoming
1182  // LMUL>1 register class down to the outgoing one. At each step we half
1183  // the LMUL:
1184  // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1185  // Note that this is not guaranteed to find a subregister index, such as
1186  // when we are extracting from one VR type to another.
1187  unsigned SubRegIdx = RISCV::NoSubRegister;
1188  for (const unsigned RCID :
1189  {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1190  if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1191  VecVT = VecVT.getHalfNumVectorElementsVT();
1192  bool IsHi =
1193  InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1194  SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1195  getSubregIndexByMVT(VecVT, IsHi));
1196  if (IsHi)
1197  InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1198  }
1199  return {SubRegIdx, InsertExtractIdx};
1200 }
1201 
1202 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1203 // stores for those types.
1204 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1205  return !Subtarget.useRVVForFixedLengthVectors() ||
1207 }
1208 
1210  const RISCVSubtarget &Subtarget) {
1211  assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1212  if (!Subtarget.useRVVForFixedLengthVectors())
1213  return false;
1214 
1215  // We only support a set of vector types with a consistent maximum fixed size
1216  // across all supported vector element types to avoid legalization issues.
1217  // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1218  // fixed-length vector type we support is 1024 bytes.
1219  if (VT.getFixedSizeInBits() > 1024 * 8)
1220  return false;
1221 
1222  unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1223 
1224  // Don't use RVV for vectors we cannot scalarize if required.
1225  switch (VT.getVectorElementType().SimpleTy) {
1226  // i1 is supported but has different rules.
1227  default:
1228  return false;
1229  case MVT::i1:
1230  // Masks can only use a single register.
1231  if (VT.getVectorNumElements() > MinVLen)
1232  return false;
1233  MinVLen /= 8;
1234  break;
1235  case MVT::i8:
1236  case MVT::i16:
1237  case MVT::i32:
1238  case MVT::i64:
1239  break;
1240  case MVT::f16:
1241  if (!Subtarget.hasStdExtZfh())
1242  return false;
1243  break;
1244  case MVT::f32:
1245  if (!Subtarget.hasStdExtF())
1246  return false;
1247  break;
1248  case MVT::f64:
1249  if (!Subtarget.hasStdExtD())
1250  return false;
1251  break;
1252  }
1253 
1254  unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1255  // Don't use RVV for types that don't fit.
1256  if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1257  return false;
1258 
1259  // TODO: Perhaps an artificial restriction, but worth having whilst getting
1260  // the base fixed length RVV support in place.
1261  if (!VT.isPow2VectorType())
1262  return false;
1263 
1264  return true;
1265 }
1266 
1267 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1269 }
1270 
1271 // Return the largest legal scalable vector type that matches VT's element type.
1273  const RISCVSubtarget &Subtarget) {
1274  // This may be called before legal types are setup.
1275  assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1276  useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1277  "Expected legal fixed length vector!");
1278 
1279  unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1280 
1281  MVT EltVT = VT.getVectorElementType();
1282  switch (EltVT.SimpleTy) {
1283  default:
1284  llvm_unreachable("unexpected element type for RVV container");
1285  case MVT::i1:
1286  case MVT::i8:
1287  case MVT::i16:
1288  case MVT::i32:
1289  case MVT::i64:
1290  case MVT::f16:
1291  case MVT::f32:
1292  case MVT::f64: {
1293  // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1294  // narrower types, but we can't have a fractional LMUL with demoninator less
1295  // than 64/SEW.
1296  unsigned NumElts =
1298  return MVT::getScalableVectorVT(EltVT, NumElts);
1299  }
1300  }
1301 }
1302 
1304  const RISCVSubtarget &Subtarget) {
1306  Subtarget);
1307 }
1308 
1311 }
1312 
1313 // Grow V to consume an entire RVV register.
1315  const RISCVSubtarget &Subtarget) {
1316  assert(VT.isScalableVector() &&
1317  "Expected to convert into a scalable vector!");
1319  "Expected a fixed length vector operand!");
1320  SDLoc DL(V);
1321  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1322  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1323 }
1324 
1325 // Shrink V so it's just big enough to maintain a VT's worth of data.
1327  const RISCVSubtarget &Subtarget) {
1328  assert(VT.isFixedLengthVector() &&
1329  "Expected to convert into a fixed length vector!");
1331  "Expected a scalable vector operand!");
1332  SDLoc DL(V);
1333  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1334  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1335 }
1336 
1337 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1338 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1339 // the vector type that it is contained in.
1340 static std::pair<SDValue, SDValue>
1341 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1342  const RISCVSubtarget &Subtarget) {
1343  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1344  MVT XLenVT = Subtarget.getXLenVT();
1345  SDValue VL = VecVT.isFixedLengthVector()
1346  ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1347  : DAG.getRegister(RISCV::X0, XLenVT);
1348  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1349  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1350  return {Mask, VL};
1351 }
1352 
1353 // As above but assuming the given type is a scalable vector type.
1354 static std::pair<SDValue, SDValue>
1356  const RISCVSubtarget &Subtarget) {
1357  assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1358  return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1359 }
1360 
1361 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1362 // of either is (currently) supported. This can get us into an infinite loop
1363 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1364 // as a ..., etc.
1365 // Until either (or both) of these can reliably lower any node, reporting that
1366 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1367 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1368 // which is not desirable.
1370  EVT VT, unsigned DefinedValues) const {
1371  return false;
1372 }
1373 
1375  // Only splats are currently supported.
1376  if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1377  return true;
1378 
1379  return false;
1380 }
1381 
1383  const RISCVSubtarget &Subtarget) {
1384  MVT VT = Op.getSimpleValueType();
1385  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1386 
1387  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1388 
1389  SDLoc DL(Op);
1390  SDValue Mask, VL;
1391  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1392 
1393  unsigned Opc =
1395  SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
1396  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1397 }
1398 
1399 struct VIDSequence {
1400  int64_t StepNumerator;
1402  int64_t Addend;
1403 };
1404 
1405 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1406 // to the (non-zero) step S and start value X. This can be then lowered as the
1407 // RVV sequence (VID * S) + X, for example.
1408 // The step S is represented as an integer numerator divided by a positive
1409 // denominator. Note that the implementation currently only identifies
1410 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1411 // cannot detect 2/3, for example.
1412 // Note that this method will also match potentially unappealing index
1413 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1414 // determine whether this is worth generating code for.
1416  unsigned NumElts = Op.getNumOperands();
1417  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1418  if (!Op.getValueType().isInteger())
1419  return None;
1420 
1421  Optional<unsigned> SeqStepDenom;
1422  Optional<int64_t> SeqStepNum, SeqAddend;
1424  unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1425  for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1426  // Assume undef elements match the sequence; we just have to be careful
1427  // when interpolating across them.
1428  if (Op.getOperand(Idx).isUndef())
1429  continue;
1430  // The BUILD_VECTOR must be all constants.
1431  if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1432  return None;
1433 
1434  uint64_t Val = Op.getConstantOperandVal(Idx) &
1435  maskTrailingOnes<uint64_t>(EltSizeInBits);
1436 
1437  if (PrevElt) {
1438  // Calculate the step since the last non-undef element, and ensure
1439  // it's consistent across the entire sequence.
1440  unsigned IdxDiff = Idx - PrevElt->second;
1441  int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1442 
1443  // A zero-value value difference means that we're somewhere in the middle
1444  // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1445  // step change before evaluating the sequence.
1446  if (ValDiff != 0) {
1447  int64_t Remainder = ValDiff % IdxDiff;
1448  // Normalize the step if it's greater than 1.
1449  if (Remainder != ValDiff) {
1450  // The difference must cleanly divide the element span.
1451  if (Remainder != 0)
1452  return None;
1453  ValDiff /= IdxDiff;
1454  IdxDiff = 1;
1455  }
1456 
1457  if (!SeqStepNum)
1458  SeqStepNum = ValDiff;
1459  else if (ValDiff != SeqStepNum)
1460  return None;
1461 
1462  if (!SeqStepDenom)
1463  SeqStepDenom = IdxDiff;
1464  else if (IdxDiff != *SeqStepDenom)
1465  return None;
1466  }
1467  }
1468 
1469  // Record and/or check any addend.
1470  if (SeqStepNum && SeqStepDenom) {
1471  uint64_t ExpectedVal =
1472  (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1473  int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1474  if (!SeqAddend)
1475  SeqAddend = Addend;
1476  else if (SeqAddend != Addend)
1477  return None;
1478  }
1479 
1480  // Record this non-undef element for later.
1481  if (!PrevElt || PrevElt->first != Val)
1482  PrevElt = std::make_pair(Val, Idx);
1483  }
1484  // We need to have logged both a step and an addend for this to count as
1485  // a legal index sequence.
1486  if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
1487  return None;
1488 
1489  return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
1490 }
1491 
1493  const RISCVSubtarget &Subtarget) {
1494  MVT VT = Op.getSimpleValueType();
1495  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1496 
1497  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1498 
1499  SDLoc DL(Op);
1500  SDValue Mask, VL;
1501  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1502 
1503  MVT XLenVT = Subtarget.getXLenVT();
1504  unsigned NumElts = Op.getNumOperands();
1505 
1506  if (VT.getVectorElementType() == MVT::i1) {
1507  if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1508  SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1509  return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1510  }
1511 
1512  if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1513  SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1514  return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1515  }
1516 
1517  // Lower constant mask BUILD_VECTORs via an integer vector type, in
1518  // scalar integer chunks whose bit-width depends on the number of mask
1519  // bits and XLEN.
1520  // First, determine the most appropriate scalar integer type to use. This
1521  // is at most XLenVT, but may be shrunk to a smaller vector element type
1522  // according to the size of the final vector - use i8 chunks rather than
1523  // XLenVT if we're producing a v8i1. This results in more consistent
1524  // codegen across RV32 and RV64.
1525  unsigned NumViaIntegerBits =
1526  std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1527  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1528  // If we have to use more than one INSERT_VECTOR_ELT then this
1529  // optimization is likely to increase code size; avoid peforming it in
1530  // such a case. We can use a load from a constant pool in this case.
1531  if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1532  return SDValue();
1533  // Now we can create our integer vector type. Note that it may be larger
1534  // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1535  MVT IntegerViaVecVT =
1536  MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1537  divideCeil(NumElts, NumViaIntegerBits));
1538 
1539  uint64_t Bits = 0;
1540  unsigned BitPos = 0, IntegerEltIdx = 0;
1541  SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1542 
1543  for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1544  // Once we accumulate enough bits to fill our scalar type, insert into
1545  // our vector and clear our accumulated data.
1546  if (I != 0 && I % NumViaIntegerBits == 0) {
1547  if (NumViaIntegerBits <= 32)
1548  Bits = SignExtend64(Bits, 32);
1549  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1550  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1551  Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1552  Bits = 0;
1553  BitPos = 0;
1554  IntegerEltIdx++;
1555  }
1556  SDValue V = Op.getOperand(I);
1557  bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
1558  Bits |= ((uint64_t)BitValue << BitPos);
1559  }
1560 
1561  // Insert the (remaining) scalar value into position in our integer
1562  // vector type.
1563  if (NumViaIntegerBits <= 32)
1564  Bits = SignExtend64(Bits, 32);
1565  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1566  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
1567  DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1568 
1569  if (NumElts < NumViaIntegerBits) {
1570  // If we're producing a smaller vector than our minimum legal integer
1571  // type, bitcast to the equivalent (known-legal) mask type, and extract
1572  // our final mask.
1573  assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
1574  Vec = DAG.getBitcast(MVT::v8i1, Vec);
1575  Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
1576  DAG.getConstant(0, DL, XLenVT));
1577  } else {
1578  // Else we must have produced an integer type with the same size as the
1579  // mask type; bitcast for the final result.
1580  assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
1581  Vec = DAG.getBitcast(VT, Vec);
1582  }
1583 
1584  return Vec;
1585  }
1586 
1587  // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
1588  // vector type, we have a legal equivalently-sized i8 type, so we can use
1589  // that.
1590  MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
1591  SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
1592 
1593  SDValue WideVec;
1594  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1595  // For a splat, perform a scalar truncate before creating the wider
1596  // vector.
1597  assert(Splat.getValueType() == XLenVT &&
1598  "Unexpected type for i1 splat value");
1599  Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
1600  DAG.getConstant(1, DL, XLenVT));
1601  WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
1602  } else {
1603  SmallVector<SDValue, 8> Ops(Op->op_values());
1604  WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
1605  SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
1606  WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
1607  }
1608 
1609  return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
1610  }
1611 
1612  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1613  unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1615  Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1616  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1617  }
1618 
1619  // Try and match index sequences, which we can lower to the vid instruction
1620  // with optional modifications. An all-undef vector is matched by
1621  // getSplatValue, above.
1622  if (auto SimpleVID = isSimpleVIDSequence(Op)) {
1623  int64_t StepNumerator = SimpleVID->StepNumerator;
1624  unsigned StepDenominator = SimpleVID->StepDenominator;
1625  int64_t Addend = SimpleVID->Addend;
1626  // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
1627  // threshold since it's the immediate value many RVV instructions accept.
1628  if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) &&
1629  isInt<5>(Addend)) {
1630  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1631  // Convert right out of the scalable type so we can use standard ISD
1632  // nodes for the rest of the computation. If we used scalable types with
1633  // these, we'd lose the fixed-length vector info and generate worse
1634  // vsetvli code.
1635  VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
1636  assert(StepNumerator != 0 && "Invalid step");
1637  bool Negate = false;
1638  if (StepNumerator != 1) {
1639  int64_t SplatStepVal = StepNumerator;
1640  unsigned Opcode = ISD::MUL;
1641  if (isPowerOf2_64(std::abs(StepNumerator))) {
1642  Negate = StepNumerator < 0;
1643  Opcode = ISD::SHL;
1644  SplatStepVal = Log2_64(std::abs(StepNumerator));
1645  }
1646  SDValue SplatStep = DAG.getSplatVector(
1647  VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
1648  VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep);
1649  }
1650  if (StepDenominator != 1) {
1651  SDValue SplatStep = DAG.getSplatVector(
1652  VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
1653  VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
1654  }
1655  if (Addend != 0 || Negate) {
1656  SDValue SplatAddend =
1657  DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
1658  VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
1659  }
1660  return VID;
1661  }
1662  }
1663 
1664  // Attempt to detect "hidden" splats, which only reveal themselves as splats
1665  // when re-interpreted as a vector with a larger element type. For example,
1666  // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
1667  // could be instead splat as
1668  // v2i32 = build_vector i32 0x00010000, i32 0x00010000
1669  // TODO: This optimization could also work on non-constant splats, but it
1670  // would require bit-manipulation instructions to construct the splat value.
1672  unsigned EltBitSize = VT.getScalarSizeInBits();
1673  const auto *BV = cast<BuildVectorSDNode>(Op);
1674  if (VT.isInteger() && EltBitSize < 64 &&
1676  BV->getRepeatedSequence(Sequence) &&
1677  (Sequence.size() * EltBitSize) <= 64) {
1678  unsigned SeqLen = Sequence.size();
1679  MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
1680  MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
1681  assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
1682  ViaIntVT == MVT::i64) &&
1683  "Unexpected sequence type");
1684 
1685  unsigned EltIdx = 0;
1686  uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
1687  uint64_t SplatValue = 0;
1688  // Construct the amalgamated value which can be splatted as this larger
1689  // vector type.
1690  for (const auto &SeqV : Sequence) {
1691  if (!SeqV.isUndef())
1692  SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
1693  << (EltIdx * EltBitSize));
1694  EltIdx++;
1695  }
1696 
1697  // On RV64, sign-extend from 32 to 64 bits where possible in order to
1698  // achieve better constant materializion.
1699  if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
1700  SplatValue = SignExtend64(SplatValue, 32);
1701 
1702  // Since we can't introduce illegal i64 types at this stage, we can only
1703  // perform an i64 splat on RV32 if it is its own sign-extended value. That
1704  // way we can use RVV instructions to splat.
1705  assert((ViaIntVT.bitsLE(XLenVT) ||
1706  (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
1707  "Unexpected bitcast sequence");
1708  if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
1709  SDValue ViaVL =
1710  DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
1711  MVT ViaContainerVT =
1712  getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
1713  SDValue Splat =
1714  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
1715  DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
1716  Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
1717  return DAG.getBitcast(VT, Splat);
1718  }
1719  }
1720 
1721  // Try and optimize BUILD_VECTORs with "dominant values" - these are values
1722  // which constitute a large proportion of the elements. In such cases we can
1723  // splat a vector with the dominant element and make up the shortfall with
1724  // INSERT_VECTOR_ELTs.
1725  // Note that this includes vectors of 2 elements by association. The
1726  // upper-most element is the "dominant" one, allowing us to use a splat to
1727  // "insert" the upper element, and an insert of the lower element at position
1728  // 0, which improves codegen.
1729  SDValue DominantValue;
1730  unsigned MostCommonCount = 0;
1731  DenseMap<SDValue, unsigned> ValueCounts;
1732  unsigned NumUndefElts =
1733  count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
1734 
1735  // Track the number of scalar loads we know we'd be inserting, estimated as
1736  // any non-zero floating-point constant. Other kinds of element are either
1737  // already in registers or are materialized on demand. The threshold at which
1738  // a vector load is more desirable than several scalar materializion and
1739  // vector-insertion instructions is not known.
1740  unsigned NumScalarLoads = 0;
1741 
1742  for (SDValue V : Op->op_values()) {
1743  if (V.isUndef())
1744  continue;
1745 
1746  ValueCounts.insert(std::make_pair(V, 0));
1747  unsigned &Count = ValueCounts[V];
1748 
1749  if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
1750  NumScalarLoads += !CFP->isExactlyValue(+0.0);
1751 
1752  // Is this value dominant? In case of a tie, prefer the highest element as
1753  // it's cheaper to insert near the beginning of a vector than it is at the
1754  // end.
1755  if (++Count >= MostCommonCount) {
1756  DominantValue = V;
1757  MostCommonCount = Count;
1758  }
1759  }
1760 
1761  assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
1762  unsigned NumDefElts = NumElts - NumUndefElts;
1763  unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
1764 
1765  // Don't perform this optimization when optimizing for size, since
1766  // materializing elements and inserting them tends to cause code bloat.
1767  if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
1768  ((MostCommonCount > DominantValueCountThreshold) ||
1769  (ValueCounts.size() <= Log2_32(NumDefElts)))) {
1770  // Start by splatting the most common element.
1771  SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
1772 
1773  DenseSet<SDValue> Processed{DominantValue};
1774  MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
1775  for (const auto &OpIdx : enumerate(Op->ops())) {
1776  const SDValue &V = OpIdx.value();
1777  if (V.isUndef() || !Processed.insert(V).second)
1778  continue;
1779  if (ValueCounts[V] == 1) {
1780  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
1781  DAG.getConstant(OpIdx.index(), DL, XLenVT));
1782  } else {
1783  // Blend in all instances of this value using a VSELECT, using a
1784  // mask where each bit signals whether that element is the one
1785  // we're after.
1787  transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
1788  return DAG.getConstant(V == V1, DL, XLenVT);
1789  });
1790  Vec = DAG.getNode(ISD::VSELECT, DL, VT,
1791  DAG.getBuildVector(SelMaskTy, DL, Ops),
1792  DAG.getSplatBuildVector(VT, DL, V), Vec);
1793  }
1794  }
1795 
1796  return Vec;
1797  }
1798 
1799  return SDValue();
1800 }
1801 
1803  SDValue Hi, SDValue VL, SelectionDAG &DAG) {
1804  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
1805  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
1806  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
1807  // If Hi constant is all the same sign bit as Lo, lower this as a custom
1808  // node in order to try and match RVV vector/scalar instructions.
1809  if ((LoC >> 31) == HiC)
1810  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
1811  }
1812 
1813  // Fall back to a stack store and stride x0 vector load.
1814  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
1815 }
1816 
1817 // Called by type legalization to handle splat of i64 on RV32.
1818 // FIXME: We can optimize this when the type has sign or zero bits in one
1819 // of the halves.
1820 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
1821  SDValue VL, SelectionDAG &DAG) {
1822  assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
1824  DAG.getConstant(0, DL, MVT::i32));
1826  DAG.getConstant(1, DL, MVT::i32));
1827  return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
1828 }
1829 
1830 // This function lowers a splat of a scalar operand Splat with the vector
1831 // length VL. It ensures the final sequence is type legal, which is useful when
1832 // lowering a splat after type legalization.
1834  SelectionDAG &DAG,
1835  const RISCVSubtarget &Subtarget) {
1836  if (VT.isFloatingPoint())
1837  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
1838 
1839  MVT XLenVT = Subtarget.getXLenVT();
1840 
1841  // Simplest case is that the operand needs to be promoted to XLenVT.
1842  if (Scalar.getValueType().bitsLE(XLenVT)) {
1843  // If the operand is a constant, sign extend to increase our chances
1844  // of being able to use a .vi instruction. ANY_EXTEND would become a
1845  // a zero extend and the simm5 check in isel would fail.
1846  // FIXME: Should we ignore the upper bits in isel instead?
1847  unsigned ExtOpc =
1848  isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
1849  Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
1850  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
1851  }
1852 
1853  assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
1854  "Unexpected scalar for splat lowering!");
1855 
1856  // Otherwise use the more complicated splatting algorithm.
1857  return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
1858 }
1859 
1861  const RISCVSubtarget &Subtarget) {
1862  SDValue V1 = Op.getOperand(0);
1863  SDValue V2 = Op.getOperand(1);
1864  SDLoc DL(Op);
1865  MVT XLenVT = Subtarget.getXLenVT();
1866  MVT VT = Op.getSimpleValueType();
1867  unsigned NumElts = VT.getVectorNumElements();
1868  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
1869 
1870  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1871 
1872  SDValue TrueMask, VL;
1873  std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1874 
1875  if (SVN->isSplat()) {
1876  const int Lane = SVN->getSplatIndex();
1877  if (Lane >= 0) {
1878  MVT SVT = VT.getVectorElementType();
1879 
1880  // Turn splatted vector load into a strided load with an X0 stride.
1881  SDValue V = V1;
1882  // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
1883  // with undef.
1884  // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
1885  int Offset = Lane;
1886  if (V.getOpcode() == ISD::CONCAT_VECTORS) {
1887  int OpElements =
1889  V = V.getOperand(Offset / OpElements);
1890  Offset %= OpElements;
1891  }
1892 
1893  // We need to ensure the load isn't atomic or volatile.
1894  if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
1895  auto *Ld = cast<LoadSDNode>(V);
1896  Offset *= SVT.getStoreSize();
1897  SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
1899 
1900  // If this is SEW=64 on RV32, use a strided load with a stride of x0.
1901  if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
1902  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
1903  SDValue IntID =
1904  DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
1905  SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
1906  DAG.getRegister(RISCV::X0, XLenVT), VL};
1907  SDValue NewLoad = DAG.getMemIntrinsicNode(
1908  ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
1910  Ld->getMemOperand(), Offset, SVT.getStoreSize()));
1911  DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
1912  return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
1913  }
1914 
1915  // Otherwise use a scalar load and splat. This will give the best
1916  // opportunity to fold a splat into the operation. ISel can turn it into
1917  // the x0 strided load if we aren't able to fold away the select.
1918  if (SVT.isFloatingPoint())
1919  V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
1920  Ld->getPointerInfo().getWithOffset(Offset),
1921  Ld->getOriginalAlign(),
1922  Ld->getMemOperand()->getFlags());
1923  else
1924  V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
1925  Ld->getPointerInfo().getWithOffset(Offset), SVT,
1926  Ld->getOriginalAlign(),
1927  Ld->getMemOperand()->getFlags());
1928  DAG.makeEquivalentMemoryOrdering(Ld, V);
1929 
1930  unsigned Opc =
1932  SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
1933  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1934  }
1935 
1936  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
1937  assert(Lane < (int)NumElts && "Unexpected lane!");
1938  SDValue Gather =
1939  DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
1940  DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
1941  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1942  }
1943  }
1944 
1945  // Detect shuffles which can be re-expressed as vector selects; these are
1946  // shuffles in which each element in the destination is taken from an element
1947  // at the corresponding index in either source vectors.
1948  bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
1949  int MaskIndex = MaskIdx.value();
1950  return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
1951  });
1952 
1953  assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
1954 
1955  SmallVector<SDValue> MaskVals;
1956  // As a backup, shuffles can be lowered via a vrgather instruction, possibly
1957  // merged with a second vrgather.
1958  SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
1959 
1960  // By default we preserve the original operand order, and use a mask to
1961  // select LHS as true and RHS as false. However, since RVV vector selects may
1962  // feature splats but only on the LHS, we may choose to invert our mask and
1963  // instead select between RHS and LHS.
1964  bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
1965  bool InvertMask = IsSelect == SwapOps;
1966 
1967  // Now construct the mask that will be used by the vselect or blended
1968  // vrgather operation. For vrgathers, construct the appropriate indices into
1969  // each vector.
1970  for (int MaskIndex : SVN->getMask()) {
1971  bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
1972  MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
1973  if (!IsSelect) {
1974  bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
1975  GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
1976  ? DAG.getConstant(MaskIndex, DL, XLenVT)
1977  : DAG.getUNDEF(XLenVT));
1978  GatherIndicesRHS.push_back(
1979  IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
1980  : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
1981  }
1982  }
1983 
1984  if (SwapOps) {
1985  std::swap(V1, V2);
1986  std::swap(GatherIndicesLHS, GatherIndicesRHS);
1987  }
1988 
1989  assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
1990  MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
1991  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
1992 
1993  if (IsSelect)
1994  return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
1995 
1996  if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
1997  // On such a large vector we're unable to use i8 as the index type.
1998  // FIXME: We could promote the index to i16 and use vrgatherei16, but that
1999  // may involve vector splitting if we're already at LMUL=8, or our
2000  // user-supplied maximum fixed-length LMUL.
2001  return SDValue();
2002  }
2003 
2004  unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
2005  MVT IndexVT = VT.changeTypeToInteger();
2006  // Since we can't introduce illegal index types at this stage, use i16 and
2007  // vrgatherei16 if the corresponding index type for plain vrgather is greater
2008  // than XLenVT.
2009  if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2010  GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
2011  IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2012  }
2013 
2014  MVT IndexContainerVT =
2015  ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2016 
2017  SDValue Gather;
2018  // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2019  // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2020  if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2021  Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
2022  } else {
2023  SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2024  LHSIndices =
2025  convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2026 
2027  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2028  Gather =
2029  DAG.getNode(GatherOpc, DL, ContainerVT, V1, LHSIndices, TrueMask, VL);
2030  }
2031 
2032  // If a second vector operand is used by this shuffle, blend it in with an
2033  // additional vrgather.
2034  if (!V2.isUndef()) {
2035  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2036  SelectMask =
2037  convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2038 
2039  SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2040  RHSIndices =
2041  convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2042 
2043  V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2044  V2 = DAG.getNode(GatherOpc, DL, ContainerVT, V2, RHSIndices, TrueMask, VL);
2045  Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
2046  Gather, VL);
2047  }
2048 
2049  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2050 }
2051 
2052 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
2053  SDLoc DL, SelectionDAG &DAG,
2054  const RISCVSubtarget &Subtarget) {
2055  if (VT.isScalableVector())
2056  return DAG.getFPExtendOrRound(Op, DL, VT);
2057  assert(VT.isFixedLengthVector() &&
2058  "Unexpected value type for RVV FP extend/round lowering");
2059  SDValue Mask, VL;
2060  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2061  unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
2064  return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
2065 }
2066 
2067 // While RVV has alignment restrictions, we should always be able to load as a
2068 // legal equivalently-sized byte-typed vector instead. This method is
2069 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2070 // the load is already correctly-aligned, it returns SDValue().
2071 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2072  SelectionDAG &DAG) const {
2073  auto *Load = cast<LoadSDNode>(Op);
2074  assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2075 
2077  Load->getMemoryVT(),
2078  *Load->getMemOperand()))
2079  return SDValue();
2080 
2081  SDLoc DL(Op);
2082  MVT VT = Op.getSimpleValueType();
2083  unsigned EltSizeBits = VT.getScalarSizeInBits();
2084  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2085  "Unexpected unaligned RVV load type");
2086  MVT NewVT =
2087  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2088  assert(NewVT.isValid() &&
2089  "Expecting equally-sized RVV vector types to be legal");
2090  SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2091  Load->getPointerInfo(), Load->getOriginalAlign(),
2092  Load->getMemOperand()->getFlags());
2093  return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2094 }
2095 
2096 // While RVV has alignment restrictions, we should always be able to store as a
2097 // legal equivalently-sized byte-typed vector instead. This method is
2098 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2099 // returns SDValue() if the store is already correctly aligned.
2100 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2101  SelectionDAG &DAG) const {
2102  auto *Store = cast<StoreSDNode>(Op);
2103  assert(Store && Store->getValue().getValueType().isVector() &&
2104  "Expected vector store");
2105 
2107  Store->getMemoryVT(),
2108  *Store->getMemOperand()))
2109  return SDValue();
2110 
2111  SDLoc DL(Op);
2112  SDValue StoredVal = Store->getValue();
2113  MVT VT = StoredVal.getSimpleValueType();
2114  unsigned EltSizeBits = VT.getScalarSizeInBits();
2115  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2116  "Unexpected unaligned RVV store type");
2117  MVT NewVT =
2118  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2119  assert(NewVT.isValid() &&
2120  "Expecting equally-sized RVV vector types to be legal");
2121  StoredVal = DAG.getBitcast(NewVT, StoredVal);
2122  return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2123  Store->getPointerInfo(), Store->getOriginalAlign(),
2124  Store->getMemOperand()->getFlags());
2125 }
2126 
2128  SelectionDAG &DAG) const {
2129  switch (Op.getOpcode()) {
2130  default:
2131  report_fatal_error("unimplemented operand");
2132  case ISD::GlobalAddress:
2133  return lowerGlobalAddress(Op, DAG);
2134  case ISD::BlockAddress:
2135  return lowerBlockAddress(Op, DAG);
2136  case ISD::ConstantPool:
2137  return lowerConstantPool(Op, DAG);
2138  case ISD::JumpTable:
2139  return lowerJumpTable(Op, DAG);
2140  case ISD::GlobalTLSAddress:
2141  return lowerGlobalTLSAddress(Op, DAG);
2142  case ISD::SELECT:
2143  return lowerSELECT(Op, DAG);
2144  case ISD::BRCOND:
2145  return lowerBRCOND(Op, DAG);
2146  case ISD::VASTART:
2147  return lowerVASTART(Op, DAG);
2148  case ISD::FRAMEADDR:
2149  return lowerFRAMEADDR(Op, DAG);
2150  case ISD::RETURNADDR:
2151  return lowerRETURNADDR(Op, DAG);
2152  case ISD::SHL_PARTS:
2153  return lowerShiftLeftParts(Op, DAG);
2154  case ISD::SRA_PARTS:
2155  return lowerShiftRightParts(Op, DAG, true);
2156  case ISD::SRL_PARTS:
2157  return lowerShiftRightParts(Op, DAG, false);
2158  case ISD::BITCAST: {
2159  SDLoc DL(Op);
2160  EVT VT = Op.getValueType();
2161  SDValue Op0 = Op.getOperand(0);
2162  EVT Op0VT = Op0.getValueType();
2163  MVT XLenVT = Subtarget.getXLenVT();
2164  if (VT.isFixedLengthVector()) {
2165  // We can handle fixed length vector bitcasts with a simple replacement
2166  // in isel.
2167  if (Op0VT.isFixedLengthVector())
2168  return Op;
2169  // When bitcasting from scalar to fixed-length vector, insert the scalar
2170  // into a one-element vector of the result type, and perform a vector
2171  // bitcast.
2172  if (!Op0VT.isVector()) {
2173  auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2174  return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2175  DAG.getUNDEF(BVT), Op0,
2176  DAG.getConstant(0, DL, XLenVT)));
2177  }
2178  return SDValue();
2179  }
2180  // Custom-legalize bitcasts from fixed-length vector types to scalar types
2181  // thus: bitcast the vector to a one-element vector type whose element type
2182  // is the same as the result type, and extract the first element.
2183  if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2184  LLVMContext &Context = *DAG.getContext();
2185  SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
2186  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
2187  DAG.getConstant(0, DL, XLenVT));
2188  }
2189  if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
2190  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
2191  SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
2192  return FPConv;
2193  }
2194  if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
2195  Subtarget.hasStdExtF()) {
2196  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2197  SDValue FPConv =
2198  DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
2199  return FPConv;
2200  }
2201  return SDValue();
2202  }
2204  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2206  return LowerINTRINSIC_W_CHAIN(Op, DAG);
2207  case ISD::BSWAP:
2208  case ISD::BITREVERSE: {
2209  // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
2210  assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2211  MVT VT = Op.getSimpleValueType();
2212  SDLoc DL(Op);
2213  // Start with the maximum immediate value which is the bitwidth - 1.
2214  unsigned Imm = VT.getSizeInBits() - 1;
2215  // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
2216  if (Op.getOpcode() == ISD::BSWAP)
2217  Imm &= ~0x7U;
2218  return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
2219  DAG.getConstant(Imm, DL, VT));
2220  }
2221  case ISD::FSHL:
2222  case ISD::FSHR: {
2223  MVT VT = Op.getSimpleValueType();
2224  assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
2225  SDLoc DL(Op);
2226  if (Op.getOperand(2).getOpcode() == ISD::Constant)
2227  return Op;
2228  // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
2229  // use log(XLen) bits. Mask the shift amount accordingly.
2230  unsigned ShAmtWidth = Subtarget.getXLen() - 1;
2231  SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
2232  DAG.getConstant(ShAmtWidth, DL, VT));
2233  unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
2234  return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
2235  }
2236  case ISD::TRUNCATE: {
2237  SDLoc DL(Op);
2238  MVT VT = Op.getSimpleValueType();
2239  // Only custom-lower vector truncates
2240  if (!VT.isVector())
2241  return Op;
2242 
2243  // Truncates to mask types are handled differently
2244  if (VT.getVectorElementType() == MVT::i1)
2245  return lowerVectorMaskTrunc(Op, DAG);
2246 
2247  // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
2248  // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
2249  // truncate by one power of two at a time.
2250  MVT DstEltVT = VT.getVectorElementType();
2251 
2252  SDValue Src = Op.getOperand(0);
2253  MVT SrcVT = Src.getSimpleValueType();
2254  MVT SrcEltVT = SrcVT.getVectorElementType();
2255 
2256  assert(DstEltVT.bitsLT(SrcEltVT) &&
2257  isPowerOf2_64(DstEltVT.getSizeInBits()) &&
2258  isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
2259  "Unexpected vector truncate lowering");
2260 
2261  MVT ContainerVT = SrcVT;
2262  if (SrcVT.isFixedLengthVector()) {
2263  ContainerVT = getContainerForFixedLengthVector(SrcVT);
2264  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2265  }
2266 
2267  SDValue Result = Src;
2268  SDValue Mask, VL;
2269  std::tie(Mask, VL) =
2270  getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
2271  LLVMContext &Context = *DAG.getContext();
2272  const ElementCount Count = ContainerVT.getVectorElementCount();
2273  do {
2274  SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2275  EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
2276  Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
2277  Mask, VL);
2278  } while (SrcEltVT != DstEltVT);
2279 
2280  if (SrcVT.isFixedLengthVector())
2281  Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
2282 
2283  return Result;
2284  }
2285  case ISD::ANY_EXTEND:
2286  case ISD::ZERO_EXTEND:
2287  if (Op.getOperand(0).getValueType().isVector() &&
2288  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2289  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
2290  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
2291  case ISD::SIGN_EXTEND:
2292  if (Op.getOperand(0).getValueType().isVector() &&
2293  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2294  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
2295  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
2297  return lowerSPLAT_VECTOR_PARTS(Op, DAG);
2299  return lowerINSERT_VECTOR_ELT(Op, DAG);
2301  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
2302  case ISD::VSCALE: {
2303  MVT VT = Op.getSimpleValueType();
2304  SDLoc DL(Op);
2305  SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
2306  // We define our scalable vector types for lmul=1 to use a 64 bit known
2307  // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
2308  // vscale as VLENB / 8.
2309  assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!");
2310  if (isa<ConstantSDNode>(Op.getOperand(0))) {
2311  // We assume VLENB is a multiple of 8. We manually choose the best shift
2312  // here because SimplifyDemandedBits isn't always able to simplify it.
2313  uint64_t Val = Op.getConstantOperandVal(0);
2314  if (isPowerOf2_64(Val)) {
2315  uint64_t Log2 = Log2_64(Val);
2316  if (Log2 < 3)
2317  return DAG.getNode(ISD::SRL, DL, VT, VLENB,
2318  DAG.getConstant(3 - Log2, DL, VT));
2319  if (Log2 > 3)
2320  return DAG.getNode(ISD::SHL, DL, VT, VLENB,
2321  DAG.getConstant(Log2 - 3, DL, VT));
2322  return VLENB;
2323  }
2324  // If the multiplier is a multiple of 8, scale it down to avoid needing
2325  // to shift the VLENB value.
2326  if ((Val % 8) == 0)
2327  return DAG.getNode(ISD::MUL, DL, VT, VLENB,
2328  DAG.getConstant(Val / 8, DL, VT));
2329  }
2330 
2331  SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
2332  DAG.getConstant(3, DL, VT));
2333  return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
2334  }
2335  case ISD::FP_EXTEND: {
2336  // RVV can only do fp_extend to types double the size as the source. We
2337  // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
2338  // via f32.
2339  SDLoc DL(Op);
2340  MVT VT = Op.getSimpleValueType();
2341  SDValue Src = Op.getOperand(0);
2342  MVT SrcVT = Src.getSimpleValueType();
2343 
2344  // Prepare any fixed-length vector operands.
2345  MVT ContainerVT = VT;
2346  if (SrcVT.isFixedLengthVector()) {
2347  ContainerVT = getContainerForFixedLengthVector(VT);
2348  MVT SrcContainerVT =
2349  ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
2350  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2351  }
2352 
2353  if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
2354  SrcVT.getVectorElementType() != MVT::f16) {
2355  // For scalable vectors, we only need to close the gap between
2356  // vXf16->vXf64.
2357  if (!VT.isFixedLengthVector())
2358  return Op;
2359  // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
2360  Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2361  return convertFromScalableVector(VT, Src, DAG, Subtarget);
2362  }
2363 
2364  MVT InterVT = VT.changeVectorElementType(MVT::f32);
2365  MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
2366  SDValue IntermediateExtend = getRVVFPExtendOrRound(
2367  Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
2368 
2369  SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
2370  DL, DAG, Subtarget);
2371  if (VT.isFixedLengthVector())
2372  return convertFromScalableVector(VT, Extend, DAG, Subtarget);
2373  return Extend;
2374  }
2375  case ISD::FP_ROUND: {
2376  // RVV can only do fp_round to types half the size as the source. We
2377  // custom-lower f64->f16 rounds via RVV's round-to-odd float
2378  // conversion instruction.
2379  SDLoc DL(Op);
2380  MVT VT = Op.getSimpleValueType();
2381  SDValue Src = Op.getOperand(0);
2382  MVT SrcVT = Src.getSimpleValueType();
2383 
2384  // Prepare any fixed-length vector operands.
2385  MVT ContainerVT = VT;
2386  if (VT.isFixedLengthVector()) {
2387  MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2388  ContainerVT =
2389  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2390  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2391  }
2392 
2393  if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
2394  SrcVT.getVectorElementType() != MVT::f64) {
2395  // For scalable vectors, we only need to close the gap between
2396  // vXf64<->vXf16.
2397  if (!VT.isFixedLengthVector())
2398  return Op;
2399  // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
2400  Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2401  return convertFromScalableVector(VT, Src, DAG, Subtarget);
2402  }
2403 
2404  SDValue Mask, VL;
2405  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2406 
2407  MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
2408  SDValue IntermediateRound =
2409  DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
2410  SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
2411  DL, DAG, Subtarget);
2412 
2413  if (VT.isFixedLengthVector())
2414  return convertFromScalableVector(VT, Round, DAG, Subtarget);
2415  return Round;
2416  }
2417  case ISD::FP_TO_SINT:
2418  case ISD::FP_TO_UINT:
2419  case ISD::SINT_TO_FP:
2420  case ISD::UINT_TO_FP: {
2421  // RVV can only do fp<->int conversions to types half/double the size as
2422  // the source. We custom-lower any conversions that do two hops into
2423  // sequences.
2424  MVT VT = Op.getSimpleValueType();
2425  if (!VT.isVector())
2426  return Op;
2427  SDLoc DL(Op);
2428  SDValue Src = Op.getOperand(0);
2429  MVT EltVT = VT.getVectorElementType();
2430  MVT SrcVT = Src.getSimpleValueType();
2431  MVT SrcEltVT = SrcVT.getVectorElementType();
2432  unsigned EltSize = EltVT.getSizeInBits();
2433  unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2434  assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
2435  "Unexpected vector element types");
2436 
2437  bool IsInt2FP = SrcEltVT.isInteger();
2438  // Widening conversions
2439  if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
2440  if (IsInt2FP) {
2441  // Do a regular integer sign/zero extension then convert to float.
2443  VT.getVectorElementCount());
2444  unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
2446  : ISD::SIGN_EXTEND;
2447  SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
2448  return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
2449  }
2450  // FP2Int
2451  assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
2452  // Do one doubling fp_extend then complete the operation by converting
2453  // to int.
2454  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2455  SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
2456  return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
2457  }
2458 
2459  // Narrowing conversions
2460  if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
2461  if (IsInt2FP) {
2462  // One narrowing int_to_fp, then an fp_round.
2463  assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
2464  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2465  SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
2466  return DAG.getFPExtendOrRound(Int2FP, DL, VT);
2467  }
2468  // FP2Int
2469  // One narrowing fp_to_int, then truncate the integer. If the float isn't
2470  // representable by the integer, the result is poison.
2471  MVT IVecVT =
2473  VT.getVectorElementCount());
2474  SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
2475  return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
2476  }
2477 
2478  // Scalable vectors can exit here. Patterns will handle equally-sized
2479  // conversions halving/doubling ones.
2480  if (!VT.isFixedLengthVector())
2481  return Op;
2482 
2483  // For fixed-length vectors we lower to a custom "VL" node.
2484  unsigned RVVOpc = 0;
2485  switch (Op.getOpcode()) {
2486  default:
2487  llvm_unreachable("Impossible opcode");
2488  case ISD::FP_TO_SINT:
2489  RVVOpc = RISCVISD::FP_TO_SINT_VL;
2490  break;
2491  case ISD::FP_TO_UINT:
2492  RVVOpc = RISCVISD::FP_TO_UINT_VL;
2493  break;
2494  case ISD::SINT_TO_FP:
2495  RVVOpc = RISCVISD::SINT_TO_FP_VL;
2496  break;
2497  case ISD::UINT_TO_FP:
2498  RVVOpc = RISCVISD::UINT_TO_FP_VL;
2499  break;
2500  }
2501 
2502  MVT ContainerVT, SrcContainerVT;
2503  // Derive the reference container type from the larger vector type.
2504  if (SrcEltSize > EltSize) {
2505  SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2506  ContainerVT =
2507  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2508  } else {
2509  ContainerVT = getContainerForFixedLengthVector(VT);
2510  SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
2511  }
2512 
2513  SDValue Mask, VL;
2514  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2515 
2516  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2517  Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
2518  return convertFromScalableVector(VT, Src, DAG, Subtarget);
2519  }
2520  case ISD::VECREDUCE_ADD:
2521  case ISD::VECREDUCE_UMAX:
2522  case ISD::VECREDUCE_SMAX:
2523  case ISD::VECREDUCE_UMIN:
2524  case ISD::VECREDUCE_SMIN:
2525  return lowerVECREDUCE(Op, DAG);
2526  case ISD::VECREDUCE_AND:
2527  case ISD::VECREDUCE_OR:
2528  case ISD::VECREDUCE_XOR:
2529  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2530  return lowerVectorMaskVECREDUCE(Op, DAG);
2531  return lowerVECREDUCE(Op, DAG);
2532  case ISD::VECREDUCE_FADD:
2534  case ISD::VECREDUCE_FMIN:
2535  case ISD::VECREDUCE_FMAX:
2536  return lowerFPVECREDUCE(Op, DAG);
2537  case ISD::INSERT_SUBVECTOR:
2538  return lowerINSERT_SUBVECTOR(Op, DAG);
2540  return lowerEXTRACT_SUBVECTOR(Op, DAG);
2541  case ISD::STEP_VECTOR:
2542  return lowerSTEP_VECTOR(Op, DAG);
2543  case ISD::VECTOR_REVERSE:
2544  return lowerVECTOR_REVERSE(Op, DAG);
2545  case ISD::BUILD_VECTOR:
2546  return lowerBUILD_VECTOR(Op, DAG, Subtarget);
2547  case ISD::SPLAT_VECTOR:
2548  if (Op.getValueType().getVectorElementType() == MVT::i1)
2549  return lowerVectorMaskSplat(Op, DAG);
2550  return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
2551  case ISD::VECTOR_SHUFFLE:
2552  return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
2553  case ISD::CONCAT_VECTORS: {
2554  // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
2555  // better than going through the stack, as the default expansion does.
2556  SDLoc DL(Op);
2557  MVT VT = Op.getSimpleValueType();
2558  unsigned NumOpElts =
2559  Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
2560  SDValue Vec = DAG.getUNDEF(VT);
2561  for (const auto &OpIdx : enumerate(Op->ops()))
2562  Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
2563  DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
2564  return Vec;
2565  }
2566  case ISD::LOAD:
2567  if (auto V = expandUnalignedRVVLoad(Op, DAG))
2568  return V;
2569  if (Op.getValueType().isFixedLengthVector())
2570  return lowerFixedLengthVectorLoadToRVV(Op, DAG);
2571  return Op;
2572  case ISD::STORE:
2573  if (auto V = expandUnalignedRVVStore(Op, DAG))
2574  return V;
2575  if (Op.getOperand(1).getValueType().isFixedLengthVector())
2576  return lowerFixedLengthVectorStoreToRVV(Op, DAG);
2577  return Op;
2578  case ISD::MLOAD:
2579  return lowerMLOAD(Op, DAG);
2580  case ISD::MSTORE:
2581  return lowerMSTORE(Op, DAG);
2582  case ISD::SETCC:
2583  return lowerFixedLengthVectorSetccToRVV(Op, DAG);
2584  case ISD::ADD:
2585  return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
2586  case ISD::SUB:
2587  return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
2588  case ISD::MUL:
2589  return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
2590  case ISD::MULHS:
2591  return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
2592  case ISD::MULHU:
2593  return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
2594  case ISD::AND:
2595  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
2597  case ISD::OR:
2598  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
2599  RISCVISD::OR_VL);
2600  case ISD::XOR:
2601  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
2603  case ISD::SDIV:
2604  return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
2605  case ISD::SREM:
2606  return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
2607  case ISD::UDIV:
2608  return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
2609  case ISD::UREM:
2610  return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
2611  case ISD::SHL:
2612  case ISD::SRA:
2613  case ISD::SRL:
2614  if (Op.getSimpleValueType().isFixedLengthVector())
2615  return lowerFixedLengthVectorShiftToRVV(Op, DAG);
2616  // This can be called for an i32 shift amount that needs to be promoted.
2617  assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
2618  "Unexpected custom legalisation");
2619  return SDValue();
2620  case ISD::SADDSAT:
2621  return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
2622  case ISD::UADDSAT:
2623  return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
2624  case ISD::SSUBSAT:
2625  return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
2626  case ISD::USUBSAT:
2627  return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
2628  case ISD::FADD:
2629  return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
2630  case ISD::FSUB:
2631  return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
2632  case ISD::FMUL:
2633  return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
2634  case ISD::FDIV:
2635  return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
2636  case ISD::FNEG:
2637  return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
2638  case ISD::FABS:
2639  return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
2640  case ISD::FSQRT:
2641  return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
2642  case ISD::FMA:
2643  return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
2644  case ISD::SMIN:
2645  return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
2646  case ISD::SMAX:
2647  return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
2648  case ISD::UMIN:
2649  return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
2650  case ISD::UMAX:
2651  return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
2652  case ISD::FMINNUM:
2653  return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
2654  case ISD::FMAXNUM:
2655  return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
2656  case ISD::ABS:
2657  return lowerABS(Op, DAG);
2658  case ISD::VSELECT:
2659  return lowerFixedLengthVectorSelectToRVV(Op, DAG);
2660  case ISD::FCOPYSIGN:
2661  return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
2662  case ISD::MGATHER:
2663  return lowerMGATHER(Op, DAG);
2664  case ISD::MSCATTER:
2665  return lowerMSCATTER(Op, DAG);
2666  case ISD::FLT_ROUNDS_:
2667  return lowerGET_ROUNDING(Op, DAG);
2668  case ISD::SET_ROUNDING:
2669  return lowerSET_ROUNDING(Op, DAG);
2670  case ISD::VP_ADD:
2671  return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
2672  case ISD::VP_SUB:
2673  return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
2674  case ISD::VP_MUL:
2675  return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
2676  case ISD::VP_SDIV:
2677  return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
2678  case ISD::VP_UDIV:
2679  return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
2680  case ISD::VP_SREM:
2681  return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
2682  case ISD::VP_UREM:
2683  return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
2684  case ISD::VP_AND:
2685  return lowerVPOp(Op, DAG, RISCVISD::AND_VL);
2686  case ISD::VP_OR:
2687  return lowerVPOp(Op, DAG, RISCVISD::OR_VL);
2688  case ISD::VP_XOR:
2689  return lowerVPOp(Op, DAG, RISCVISD::XOR_VL);
2690  case ISD::VP_ASHR:
2691  return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
2692  case ISD::VP_LSHR:
2693  return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
2694  case ISD::VP_SHL:
2695  return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
2696  case ISD::VP_FADD:
2697  return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
2698  case ISD::VP_FSUB:
2699  return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
2700  case ISD::VP_FMUL:
2701  return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
2702  case ISD::VP_FDIV:
2703  return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
2704  }
2705 }
2706 
2708  SelectionDAG &DAG, unsigned Flags) {
2709  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
2710 }
2711 
2713  SelectionDAG &DAG, unsigned Flags) {
2714  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
2715  Flags);
2716 }
2717 
2719  SelectionDAG &DAG, unsigned Flags) {
2720  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
2721  N->getOffset(), Flags);
2722 }
2723 
2725  SelectionDAG &DAG, unsigned Flags) {
2726  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
2727 }
2728 
2729 template <class NodeTy>
2730 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2731  bool IsLocal) const {
2732  SDLoc DL(N);
2733  EVT Ty = getPointerTy(DAG.getDataLayout());
2734 
2735  if (isPositionIndependent()) {
2736  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2737  if (IsLocal)
2738  // Use PC-relative addressing to access the symbol. This generates the
2739  // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
2740  // %pcrel_lo(auipc)).
2741  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2742 
2743  // Use PC-relative addressing to access the GOT for this symbol, then load
2744  // the address from the GOT. This generates the pattern (PseudoLA sym),
2745  // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
2746  return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
2747  }
2748 
2749  switch (getTargetMachine().getCodeModel()) {
2750  default:
2751  report_fatal_error("Unsupported code model for lowering");
2752  case CodeModel::Small: {
2753  // Generate a sequence for accessing addresses within the first 2 GiB of
2754  // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
2755  SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
2756  SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
2757  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2758  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
2759  }
2760  case CodeModel::Medium: {
2761  // Generate a sequence for accessing addresses within any 2GiB range within
2762  // the address space. This generates the pattern (PseudoLLA sym), which
2763  // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
2764  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2765  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2766  }
2767  }
2768 }
2769 
2770 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
2771  SelectionDAG &DAG) const {
2772  SDLoc DL(Op);
2773  EVT Ty = Op.getValueType();
2774  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2775  int64_t Offset = N->getOffset();
2776  MVT XLenVT = Subtarget.getXLenVT();
2777 
2778  const GlobalValue *GV = N->getGlobal();
2779  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
2780  SDValue Addr = getAddr(N, DAG, IsLocal);
2781 
2782  // In order to maximise the opportunity for common subexpression elimination,
2783  // emit a separate ADD node for the global address offset instead of folding
2784  // it in the global address node. Later peephole optimisations may choose to
2785  // fold it back in when profitable.
2786  if (Offset != 0)
2787  return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2788  DAG.getConstant(Offset, DL, XLenVT));
2789  return Addr;
2790 }
2791 
2792 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
2793  SelectionDAG &DAG) const {
2794  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
2795 
2796  return getAddr(N, DAG);
2797 }
2798 
2799 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
2800  SelectionDAG &DAG) const {
2801  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
2802 
2803  return getAddr(N, DAG);
2804 }
2805 
2806 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
2807  SelectionDAG &DAG) const {
2808  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
2809 
2810  return getAddr(N, DAG);
2811 }
2812 
2813 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
2814  SelectionDAG &DAG,
2815  bool UseGOT) const {
2816  SDLoc DL(N);
2817  EVT Ty = getPointerTy(DAG.getDataLayout());
2818  const GlobalValue *GV = N->getGlobal();
2819  MVT XLenVT = Subtarget.getXLenVT();
2820 
2821  if (UseGOT) {
2822  // Use PC-relative addressing to access the GOT for this TLS symbol, then
2823  // load the address from the GOT and add the thread pointer. This generates
2824  // the pattern (PseudoLA_TLS_IE sym), which expands to
2825  // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
2826  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2827  SDValue Load =
2828  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
2829 
2830  // Add the thread pointer.
2831  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2832  return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
2833  }
2834 
2835  // Generate a sequence for accessing the address relative to the thread
2836  // pointer, with the appropriate adjustment for the thread pointer offset.
2837  // This generates the pattern
2838  // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
2839  SDValue AddrHi =
2841  SDValue AddrAdd =
2843  SDValue AddrLo =
2845 
2846  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2847  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2848  SDValue MNAdd = SDValue(
2849  DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
2850  0);
2851  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
2852 }
2853 
2854 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
2855  SelectionDAG &DAG) const {
2856  SDLoc DL(N);
2857  EVT Ty = getPointerTy(DAG.getDataLayout());
2858  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
2859  const GlobalValue *GV = N->getGlobal();
2860 
2861  // Use a PC-relative addressing mode to access the global dynamic GOT address.
2862  // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
2863  // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
2864  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2865  SDValue Load =
2866  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
2867 
2868  // Prepare argument list to generate call.
2869  ArgListTy Args;
2870  ArgListEntry Entry;
2871  Entry.Node = Load;
2872  Entry.Ty = CallTy;
2873  Args.push_back(Entry);
2874 
2875  // Setup call to __tls_get_addr.
2877  CLI.setDebugLoc(DL)
2878  .setChain(DAG.getEntryNode())
2879  .setLibCallee(CallingConv::C, CallTy,
2880  DAG.getExternalSymbol("__tls_get_addr", Ty),
2881  std::move(Args));
2882 
2883  return LowerCallTo(CLI).first;
2884 }
2885 
2886 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
2887  SelectionDAG &DAG) const {
2888  SDLoc DL(Op);
2889  EVT Ty = Op.getValueType();
2890  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2891  int64_t Offset = N->getOffset();
2892  MVT XLenVT = Subtarget.getXLenVT();
2893 
2895 
2898  report_fatal_error("In GHC calling convention TLS is not supported");
2899 
2900  SDValue Addr;
2901  switch (Model) {
2902  case TLSModel::LocalExec:
2903  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
2904  break;
2905  case TLSModel::InitialExec:
2906  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
2907  break;
2910  Addr = getDynamicTLSAddr(N, DAG);
2911  break;
2912  }
2913 
2914  // In order to maximise the opportunity for common subexpression elimination,
2915  // emit a separate ADD node for the global address offset instead of folding
2916  // it in the global address node. Later peephole optimisations may choose to
2917  // fold it back in when profitable.
2918  if (Offset != 0)
2919  return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2920  DAG.getConstant(Offset, DL, XLenVT));
2921  return Addr;
2922 }
2923 
2924 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2925  SDValue CondV = Op.getOperand(0);
2926  SDValue TrueV = Op.getOperand(1);
2927  SDValue FalseV = Op.getOperand(2);
2928  SDLoc DL(Op);
2929  MVT VT = Op.getSimpleValueType();
2930  MVT XLenVT = Subtarget.getXLenVT();
2931 
2932  // Lower vector SELECTs to VSELECTs by splatting the condition.
2933  if (VT.isVector()) {
2934  MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
2935  SDValue CondSplat = VT.isScalableVector()
2936  ? DAG.getSplatVector(SplatCondVT, DL, CondV)
2937  : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
2938  return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
2939  }
2940 
2941  // If the result type is XLenVT and CondV is the output of a SETCC node
2942  // which also operated on XLenVT inputs, then merge the SETCC node into the
2943  // lowered RISCVISD::SELECT_CC to take advantage of the integer
2944  // compare+branch instructions. i.e.:
2945  // (select (setcc lhs, rhs, cc), truev, falsev)
2946  // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
2947  if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
2948  CondV.getOperand(0).getSimpleValueType() == XLenVT) {
2949  SDValue LHS = CondV.getOperand(0);
2950  SDValue RHS = CondV.getOperand(1);
2951  const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
2952  ISD::CondCode CCVal = CC->get();
2953 
2954  // Special case for a select of 2 constants that have a diffence of 1.
2955  // Normally this is done by DAGCombine, but if the select is introduced by
2956  // type legalization or op legalization, we miss it. Restricting to SETLT
2957  // case for now because that is what signed saturating add/sub need.
2958  // FIXME: We don't need the condition to be SETLT or even a SETCC,
2959  // but we would probably want to swap the true/false values if the condition
2960  // is SETGE/SETLE to avoid an XORI.
2961  if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
2962  CCVal == ISD::SETLT) {
2963  const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
2964  const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
2965  if (TrueVal - 1 == FalseVal)
2966  return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
2967  if (TrueVal + 1 == FalseVal)
2968  return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
2969  }
2970 
2971  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
2972 
2973  SDValue TargetCC = DAG.getTargetConstant(CCVal, DL, XLenVT);
2974  SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
2975  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
2976  }
2977 
2978  // Otherwise:
2979  // (select condv, truev, falsev)
2980  // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
2981  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
2982  SDValue SetNE = DAG.getTargetConstant(ISD::SETNE, DL, XLenVT);
2983 
2984  SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
2985 
2986  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
2987 }
2988 
2989 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
2990  SDValue CondV = Op.getOperand(1);
2991  SDLoc DL(Op);
2992  MVT XLenVT = Subtarget.getXLenVT();
2993 
2994  if (CondV.getOpcode() == ISD::SETCC &&
2995  CondV.getOperand(0).getValueType() == XLenVT) {
2996  SDValue LHS = CondV.getOperand(0);
2997  SDValue RHS = CondV.getOperand(1);
2998  ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
2999 
3000  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3001 
3002  SDValue TargetCC = DAG.getCondCode(CCVal);
3003  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3004  LHS, RHS, TargetCC, Op.getOperand(2));
3005  }
3006 
3007  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3008  CondV, DAG.getConstant(0, DL, XLenVT),
3009  DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3010 }
3011 
3012 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3013  MachineFunction &MF = DAG.getMachineFunction();
3015 
3016  SDLoc DL(Op);
3017  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3018  getPointerTy(MF.getDataLayout()));
3019 
3020  // vastart just stores the address of the VarArgsFrameIndex slot into the
3021  // memory location argument.
3022  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3023  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3024  MachinePointerInfo(SV));
3025 }
3026 
3027 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3028  SelectionDAG &DAG) const {
3029  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3030  MachineFunction &MF = DAG.getMachineFunction();
3031  MachineFrameInfo &MFI = MF.getFrameInfo();
3032  MFI.setFrameAddressIsTaken(true);
3033  Register FrameReg = RI.getFrameRegister(MF);
3034  int XLenInBytes = Subtarget.getXLen() / 8;
3035 
3036  EVT VT = Op.getValueType();
3037  SDLoc DL(Op);
3038  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3039  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3040  while (Depth--) {
3041  int Offset = -(XLenInBytes * 2);
3042  SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3043  DAG.getIntPtrConstant(Offset, DL));
3044  FrameAddr =
3045  DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3046  }
3047  return FrameAddr;
3048 }
3049 
3050 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3051  SelectionDAG &DAG) const {
3052  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3053  MachineFunction &MF = DAG.getMachineFunction();
3054  MachineFrameInfo &MFI = MF.getFrameInfo();
3055  MFI.setReturnAddressIsTaken(true);
3056  MVT XLenVT = Subtarget.getXLenVT();
3057  int XLenInBytes = Subtarget.getXLen() / 8;
3058 
3060  return SDValue();
3061 
3062  EVT VT = Op.getValueType();
3063  SDLoc DL(Op);
3064  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3065  if (Depth) {
3066  int Off = -XLenInBytes;
3067  SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3068  SDValue Offset = DAG.getConstant(Off, DL, VT);
3069  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3070  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3071  MachinePointerInfo());
3072  }
3073 
3074  // Return the value of the return address register, marking it an implicit
3075  // live-in.
3076  Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3077  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3078 }
3079 
3080 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3081  SelectionDAG &DAG) const {
3082  SDLoc DL(Op);
3083  SDValue Lo = Op.getOperand(0);
3084  SDValue Hi = Op.getOperand(1);
3085  SDValue Shamt = Op.getOperand(2);
3086  EVT VT = Lo.getValueType();
3087 
3088  // if Shamt-XLEN < 0: // Shamt < XLEN
3089  // Lo = Lo << Shamt
3090  // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
3091  // else:
3092  // Lo = 0
3093  // Hi = Lo << (Shamt-XLEN)
3094 
3095  SDValue Zero = DAG.getConstant(0, DL, VT);
3096  SDValue One = DAG.getConstant(1, DL, VT);
3097  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3098  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3099  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3100  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3101 
3102  SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3103  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3104  SDValue ShiftRightLo =
3105  DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
3106  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3107  SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3108  SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
3109 
3110  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3111 
3112  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3113  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3114 
3115  SDValue Parts[2] = {Lo, Hi};
3116  return DAG.getMergeValues(Parts, DL);
3117 }
3118 
3119 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
3120  bool IsSRA) const {
3121  SDLoc DL(Op);
3122  SDValue Lo = Op.getOperand(0);
3123  SDValue Hi = Op.getOperand(1);
3124  SDValue Shamt = Op.getOperand(2);
3125  EVT VT = Lo.getValueType();
3126 
3127  // SRA expansion:
3128  // if Shamt-XLEN < 0: // Shamt < XLEN
3129  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3130  // Hi = Hi >>s Shamt
3131  // else:
3132  // Lo = Hi >>s (Shamt-XLEN);
3133  // Hi = Hi >>s (XLEN-1)
3134  //
3135  // SRL expansion:
3136  // if Shamt-XLEN < 0: // Shamt < XLEN
3137  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3138  // Hi = Hi >>u Shamt
3139  // else:
3140  // Lo = Hi >>u (Shamt-XLEN);
3141  // Hi = 0;
3142 
3143  unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3144 
3145  SDValue Zero = DAG.getConstant(0, DL, VT);
3146  SDValue One = DAG.getConstant(1, DL, VT);
3147  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3148  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3149  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3150  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3151 
3152  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3153  SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3154  SDValue ShiftLeftHi =
3155  DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
3156  SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3157  SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3158  SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
3159  SDValue HiFalse =
3160  IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
3161 
3162  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3163 
3164  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3165  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3166 
3167  SDValue Parts[2] = {Lo, Hi};
3168  return DAG.getMergeValues(Parts, DL);
3169 }
3170 
3171 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
3172 // legal equivalently-sized i8 type, so we can use that as a go-between.
3173 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
3174  SelectionDAG &DAG) const {
3175  SDLoc DL(Op);
3176  MVT VT = Op.getSimpleValueType();
3177  SDValue SplatVal = Op.getOperand(0);
3178  // All-zeros or all-ones splats are handled specially.
3179  if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
3180  SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3181  return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
3182  }
3183  if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
3184  SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3185  return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
3186  }
3187  MVT XLenVT = Subtarget.getXLenVT();
3188  assert(SplatVal.getValueType() == XLenVT &&
3189  "Unexpected type for i1 splat value");
3190  MVT InterVT = VT.changeVectorElementType(MVT::i8);
3191  SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
3192  DAG.getConstant(1, DL, XLenVT));
3193  SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
3194  SDValue Zero = DAG.getConstant(0, DL, InterVT);
3195  return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
3196 }
3197 
3198 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
3199 // illegal (currently only vXi64 RV32).
3200 // FIXME: We could also catch non-constant sign-extended i32 values and lower
3201 // them to SPLAT_VECTOR_I64
3202 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
3203  SelectionDAG &DAG) const {
3204  SDLoc DL(Op);
3205  MVT VecVT = Op.getSimpleValueType();
3206  assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
3207  "Unexpected SPLAT_VECTOR_PARTS lowering");
3208 
3209  assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
3210  SDValue Lo = Op.getOperand(0);
3211  SDValue Hi = Op.getOperand(1);
3212 
3213  if (VecVT.isFixedLengthVector()) {
3214  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3215  SDLoc DL(Op);
3216  SDValue Mask, VL;
3217  std::tie(Mask, VL) =
3218  getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3219 
3220  SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
3221  return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
3222  }
3223 
3224  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3225  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3226  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3227  // If Hi constant is all the same sign bit as Lo, lower this as a custom
3228  // node in order to try and match RVV vector/scalar instructions.
3229  if ((LoC >> 31) == HiC)
3230  return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3231  }
3232 
3233  // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3234  if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3235  isa<ConstantSDNode>(Hi.getOperand(1)) &&
3236  Hi.getConstantOperandVal(1) == 31)
3237  return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3238 
3239  // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
3240  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
3241  DAG.getRegister(RISCV::X0, MVT::i64));
3242 }
3243 
3244 // Custom-lower extensions from mask vectors by using a vselect either with 1
3245 // for zero/any-extension or -1 for sign-extension:
3246 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
3247 // Note that any-extension is lowered identically to zero-extension.
3248 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
3249  int64_t ExtTrueVal) const {
3250  SDLoc DL(Op);
3251  MVT VecVT = Op.getSimpleValueType();
3252  SDValue Src = Op.getOperand(0);
3253  // Only custom-lower extensions from mask types
3254  assert(Src.getValueType().isVector() &&
3256 
3257  MVT XLenVT = Subtarget.getXLenVT();
3258  SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
3259  SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
3260 
3261  if (VecVT.isScalableVector()) {
3262  // Be careful not to introduce illegal scalar types at this stage, and be
3263  // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
3264  // illegal and must be expanded. Since we know that the constants are
3265  // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
3266  bool IsRV32E64 =
3267  !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
3268 
3269  if (!IsRV32E64) {
3270  SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
3271  SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
3272  } else {
3273  SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
3274  SplatTrueVal =
3275  DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
3276  }
3277 
3278  return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
3279  }
3280 
3281  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3282  MVT I1ContainerVT =
3284 
3285  SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
3286 
3287  SDValue Mask, VL;
3288  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3289 
3290  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
3291  SplatTrueVal =
3292  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
3293  SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
3294  SplatTrueVal, SplatZero, VL);
3295 
3296  return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
3297 }
3298 
3299 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
3300  SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
3301  MVT ExtVT = Op.getSimpleValueType();
3302  // Only custom-lower extensions from fixed-length vector types.
3303  if (!ExtVT.isFixedLengthVector())
3304  return Op;
3305  MVT VT = Op.getOperand(0).getSimpleValueType();
3306  // Grab the canonical container type for the extended type. Infer the smaller
3307  // type from that to ensure the same number of vector elements, as we know
3308  // the LMUL will be sufficient to hold the smaller type.
3309  MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
3310  // Get the extended container type manually to ensure the same number of
3311  // vector elements between source and dest.
3312  MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
3313  ContainerExtVT.getVectorElementCount());
3314 
3315  SDValue Op1 =
3316  convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
3317 
3318  SDLoc DL(Op);
3319  SDValue Mask, VL;
3320  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3321 
3322  SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
3323 
3324  return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
3325 }
3326 
3327 // Custom-lower truncations from vectors to mask vectors by using a mask and a
3328 // setcc operation:
3329 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
3330 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
3331  SelectionDAG &DAG) const {
3332  SDLoc DL(Op);
3333  EVT MaskVT = Op.getValueType();
3334  // Only expect to custom-lower truncations to mask types
3335  assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
3336  "Unexpected type for vector mask lowering");
3337  SDValue Src = Op.getOperand(0);
3338  MVT VecVT = Src.getSimpleValueType();
3339 
3340  // If this is a fixed vector, we need to convert it to a scalable vector.
3341  MVT ContainerVT = VecVT;
3342  if (VecVT.isFixedLengthVector()) {
3343  ContainerVT = getContainerForFixedLengthVector(VecVT);
3344  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3345  }
3346 
3347  SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
3348  SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
3349 
3350  SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
3351  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
3352 
3353  if (VecVT.isScalableVector()) {
3354  SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
3355  return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
3356  }
3357 
3358  SDValue Mask, VL;
3359  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3360 
3361  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
3362  SDValue Trunc =
3363  DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
3364  Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
3365  DAG.getCondCode(ISD::SETNE), Mask, VL);
3366  return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
3367 }
3368 
3369 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
3370 // first position of a vector, and that vector is slid up to the insert index.
3371 // By limiting the active vector length to index+1 and merging with the
3372 // original vector (with an undisturbed tail policy for elements >= VL), we
3373 // achieve the desired result of leaving all elements untouched except the one
3374 // at VL-1, which is replaced with the desired value.
3375 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3376  SelectionDAG &DAG) const {
3377  SDLoc DL(Op);
3378  MVT VecVT = Op.getSimpleValueType();
3379  SDValue Vec = Op.getOperand(0);
3380  SDValue Val = Op.getOperand(1);
3381  SDValue Idx = Op.getOperand(2);
3382 
3383  if (VecVT.getVectorElementType() == MVT::i1) {
3384  // FIXME: For now we just promote to an i8 vector and insert into that,
3385  // but this is probably not optimal.
3387  Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3388  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
3389  return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
3390  }
3391 
3392  MVT ContainerVT = VecVT;
3393  // If the operand is a fixed-length vector, convert to a scalable one.
3394  if (VecVT.isFixedLengthVector()) {
3395  ContainerVT = getContainerForFixedLengthVector(VecVT);
3396  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3397  }
3398 
3399  MVT XLenVT = Subtarget.getXLenVT();
3400 
3401  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3402  bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
3403  // Even i64-element vectors on RV32 can be lowered without scalar
3404  // legalization if the most-significant 32 bits of the value are not affected
3405  // by the sign-extension of the lower 32 bits.
3406  // TODO: We could also catch sign extensions of a 32-bit value.
3407  if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
3408  const auto *CVal = cast<ConstantSDNode>(Val);
3409  if (isInt<32>(CVal->getSExtValue())) {
3410  IsLegalInsert = true;
3411  Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3412  }
3413  }
3414 
3415  SDValue Mask, VL;
3416  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3417 
3418  SDValue ValInVec;
3419 
3420  if (IsLegalInsert) {
3421  unsigned Opc =
3423  if (isNullConstant(Idx)) {
3424  Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
3425  if (!VecVT.isFixedLengthVector())
3426  return Vec;
3427  return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
3428  }
3429  ValInVec =
3430  DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
3431  } else {
3432  // On RV32, i64-element vectors must be specially handled to place the
3433  // value at element 0, by using two vslide1up instructions in sequence on
3434  // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
3435  // this.
3436  SDValue One = DAG.getConstant(1, DL, XLenVT);
3437  SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
3438  SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
3439  MVT I32ContainerVT =
3440  MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
3441  SDValue I32Mask =
3442  getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
3443  // Limit the active VL to two.
3444  SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
3445  // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
3446  // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
3447  ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
3448  InsertI64VL);
3449  // First slide in the hi value, then the lo in underneath it.
3450  ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3451  ValHi, I32Mask, InsertI64VL);
3452  ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3453  ValLo, I32Mask, InsertI64VL);
3454  // Bitcast back to the right container type.
3455  ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
3456  }
3457 
3458  // Now that the value is in a vector, slide it into position.
3459  SDValue InsertVL =
3460  DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
3461  SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
3462  ValInVec, Idx, Mask, InsertVL);
3463  if (!VecVT.isFixedLengthVector())
3464  return Slideup;
3465  return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
3466 }
3467 
3468 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
3469 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
3470 // types this is done using VMV_X_S to allow us to glean information about the
3471 // sign bits of the result.
3472 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3473  SelectionDAG &DAG) const {
3474  SDLoc DL(Op);
3475  SDValue Idx = Op.getOperand(1);
3476  SDValue Vec = Op.getOperand(0);
3477  EVT EltVT = Op.getValueType();
3478  MVT VecVT = Vec.getSimpleValueType();
3479  MVT XLenVT = Subtarget.getXLenVT();
3480 
3481  if (VecVT.getVectorElementType() == MVT::i1) {
3482  // FIXME: For now we just promote to an i8 vector and extract from that,
3483  // but this is probably not optimal.
3485  Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3486  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
3487  }
3488 
3489  // If this is a fixed vector, we need to convert it to a scalable vector.
3490  MVT ContainerVT = VecVT;
3491  if (VecVT.isFixedLengthVector()) {
3492  ContainerVT = getContainerForFixedLengthVector(VecVT);
3493  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3494  }
3495 
3496  // If the index is 0, the vector is already in the right position.
3497  if (!isNullConstant(Idx)) {
3498  // Use a VL of 1 to avoid processing more elements than we need.
3499  SDValue VL = DAG.getConstant(1, DL, XLenVT);
3500  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3501  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3502  Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3503  DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
3504  }
3505 
3506  if (!EltVT.isInteger()) {
3507  // Floating-point extracts are handled in TableGen.
3508  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
3509  DAG.getConstant(0, DL, XLenVT));
3510  }
3511 
3512  SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
3513  return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
3514 }
3515 
3516 // Some RVV intrinsics may claim that they want an integer operand to be
3517 // promoted or expanded.
3519  const RISCVSubtarget &Subtarget) {
3520  assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3521  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
3522  "Unexpected opcode");
3523 
3524  if (!Subtarget.hasStdExtV())
3525  return SDValue();
3526 
3527  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
3528  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
3529  SDLoc DL(Op);
3530 
3532  RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
3533  if (!II || !II->SplatOperand)
3534  return SDValue();
3535 
3536  unsigned SplatOp = II->SplatOperand + HasChain;
3537  assert(SplatOp < Op.getNumOperands());
3538 
3539  SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
3540  SDValue &ScalarOp = Operands[SplatOp];
3541  MVT OpVT = ScalarOp.getSimpleValueType();
3542  MVT XLenVT = Subtarget.getXLenVT();
3543 
3544  // If this isn't a scalar, or its type is XLenVT we're done.
3545  if (!OpVT.isScalarInteger() || OpVT == XLenVT)
3546  return SDValue();
3547 
3548  // Simplest case is that the operand needs to be promoted to XLenVT.
3549  if (OpVT.bitsLT(XLenVT)) {
3550  // If the operand is a constant, sign extend to increase our chances
3551  // of being able to use a .vi instruction. ANY_EXTEND would become a
3552  // a zero extend and the simm5 check in isel would fail.
3553  // FIXME: Should we ignore the upper bits in isel instead?
3554  unsigned ExtOpc =
3555  isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3556  ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
3557  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3558  }
3559 
3560  // Use the previous operand to get the vXi64 VT. The result might be a mask
3561  // VT for compares. Using the previous operand assumes that the previous
3562  // operand will never have a smaller element size than a scalar operand and
3563  // that a widening operation never uses SEW=64.
3564  // NOTE: If this fails the below assert, we can probably just find the
3565  // element count from any operand or result and use it to construct the VT.
3566  assert(II->SplatOperand > 1 && "Unexpected splat operand!");
3567  MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
3568 
3569  // The more complex case is when the scalar is larger than XLenVT.
3570  assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
3571  VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
3572 
3573  // If this is a sign-extended 32-bit constant, we can truncate it and rely
3574  // on the instruction to sign-extend since SEW>XLEN.
3575  if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
3576  if (isInt<32>(CVal->getSExtValue())) {
3577  ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3578  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3579  }
3580  }
3581 
3582  // We need to convert the scalar to a splat vector.
3583  // FIXME: Can we implicitly truncate the scalar if it is known to
3584  // be sign extended?
3585  // VL should be the last operand.
3586  SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
3587  assert(VL.getValueType() == XLenVT);
3588  ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
3589  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3590 }
3591 
3592 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3593  SelectionDAG &DAG) const {
3594  unsigned IntNo = Op.getConstantOperandVal(0);
3595  SDLoc DL(Op);
3596  MVT XLenVT = Subtarget.getXLenVT();
3597 
3598  switch (IntNo) {
3599  default:
3600  break; // Don't custom lower most intrinsics.
3601  case Intrinsic::thread_pointer: {
3602  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3603  return DAG.getRegister(RISCV::X4, PtrVT);
3604  }
3605  case Intrinsic::riscv_orc_b:
3606  // Lower to the GORCI encoding for orc.b.
3607  return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
3608  DAG.getConstant(7, DL, XLenVT));
3609  case Intrinsic::riscv_grev:
3610  case Intrinsic::riscv_gorc: {
3611  unsigned Opc =
3612  IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
3613  return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3614  }
3615  case Intrinsic::riscv_shfl:
3616  case Intrinsic::riscv_unshfl: {
3617  unsigned Opc =
3618  IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
3619  return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3620  }
3621  case Intrinsic::riscv_bcompress:
3622  case Intrinsic::riscv_bdecompress: {
3623  unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
3625  return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3626  }
3627  case Intrinsic::riscv_vmv_x_s:
3628  assert(Op.getValueType() == XLenVT && "Unexpected VT!");
3629  return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
3630  Op.getOperand(1));
3631  case Intrinsic::riscv_vmv_v_x:
3632  return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
3633  Op.getSimpleValueType(), DL, DAG, Subtarget);
3634  case Intrinsic::riscv_vfmv_v_f:
3635  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
3636  Op.getOperand(1), Op.getOperand(2));
3637  case Intrinsic::riscv_vmv_s_x: {
3638  SDValue Scalar = Op.getOperand(2);
3639 
3640  if (Scalar.getValueType().bitsLE(XLenVT)) {
3641  Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
3642  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
3643  Op.getOperand(1), Scalar, Op.getOperand(3));
3644  }
3645 
3646  assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
3647 
3648  // This is an i64 value that lives in two scalar registers. We have to
3649  // insert this in a convoluted way. First we build vXi64 splat containing
3650  // the/ two values that we assemble using some bit math. Next we'll use
3651  // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
3652  // to merge element 0 from our splat into the source vector.
3653  // FIXME: This is probably not the best way to do this, but it is
3654  // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
3655  // point.
3656  // sw lo, (a0)
3657  // sw hi, 4(a0)
3658  // vlse vX, (a0)
3659  //
3660  // vid.v vVid
3661  // vmseq.vx mMask, vVid, 0
3662  // vmerge.vvm vDest, vSrc, vVal, mMask
3663  MVT VT = Op.getSimpleValueType();
3664  SDValue Vec = Op.getOperand(1);
3665  SDValue VL = Op.getOperand(3);
3666 
3667  SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
3668  SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
3669  DAG.getConstant(0, DL, MVT::i32), VL);
3670 
3672  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3673  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
3674  SDValue SelectCond =
3675  DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
3676  DAG.getCondCode(ISD::SETEQ), Mask, VL);
3677  return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
3678  Vec, VL);
3679  }
3680  case Intrinsic::riscv_vslide1up:
3681  case Intrinsic::riscv_vslide1down:
3682  case Intrinsic::riscv_vslide1up_mask:
3683  case Intrinsic::riscv_vslide1down_mask: {
3684  // We need to special case these when the scalar is larger than XLen.
3685  unsigned NumOps = Op.getNumOperands();
3686  bool IsMasked = NumOps == 6;
3687  unsigned OpOffset = IsMasked ? 1 : 0;
3688  SDValue Scalar = Op.getOperand(2 + OpOffset);
3689  if (Scalar.getValueType().bitsLE(XLenVT))
3690  break;
3691 
3692  // Splatting a sign extended constant is fine.
3693  if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
3694  if (isInt<32>(CVal->getSExtValue()))
3695  break;
3696 
3697  MVT VT = Op.getSimpleValueType();
3699  Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
3700 
3701  // Convert the vector source to the equivalent nxvXi32 vector.
3703  SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
3704 
3705  SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3706  DAG.getConstant(0, DL, XLenVT));
3707  SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3708  DAG.getConstant(1, DL, XLenVT));
3709 
3710  // Double the VL since we halved SEW.
3711  SDValue VL = Op.getOperand(NumOps - 1);
3712  SDValue I32VL =
3713  DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
3714 
3715  MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
3716  SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
3717 
3718  // Shift the two scalar parts in using SEW=32 slide1up/slide1down
3719  // instructions.
3720  if (IntNo == Intrinsic::riscv_vslide1up ||
3721  IntNo == Intrinsic::riscv_vslide1up_mask) {
3722  Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
3723  I32Mask, I32VL);
3724  Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
3725  I32Mask, I32VL);
3726  } else {
3727  Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
3728  I32Mask, I32VL);
3729  Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
3730  I32Mask, I32VL);
3731  }
3732 
3733  // Convert back to nxvXi64.
3734  Vec = DAG.getBitcast(VT, Vec);
3735 
3736  if (!IsMasked)
3737  return Vec;
3738 
3739  // Apply mask after the operation.
3740  SDValue Mask = Op.getOperand(NumOps - 2);
3741  SDValue MaskedOff = Op.getOperand(1);
3742  return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
3743  }
3744  }
3745 
3746  return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3747 }
3748 
3749 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
3750  SelectionDAG &DAG) const {
3751  return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3752 }
3753 
3754 static MVT getLMUL1VT(MVT VT) {
3755  assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3756  "Unexpected vector MVT");
3757  return MVT::getScalableVectorVT(
3758  VT.getVectorElementType(),
3760 }
3761 
3762 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
3763  switch (ISDOpcode) {
3764  default:
3765  llvm_unreachable("Unhandled reduction");
3766  case ISD::VECREDUCE_ADD:
3768  case ISD::VECREDUCE_UMAX:
3770  case ISD::VECREDUCE_SMAX:
3772  case ISD::VECREDUCE_UMIN:
3774  case ISD::VECREDUCE_SMIN:
3776  case ISD::VECREDUCE_AND:
3778  case ISD::VECREDUCE_OR:
3780  case ISD::VECREDUCE_XOR:
3782  }
3783 }
3784 
3785 SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
3786  SelectionDAG &DAG) const {
3787  SDLoc DL(Op);
3788  SDValue Vec = Op.getOperand(0);
3789  MVT VecVT = Vec.getSimpleValueType();
3790  assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
3791  Op.getOpcode() == ISD::VECREDUCE_OR ||
3792  Op.getOpcode() == ISD::VECREDUCE_XOR) &&
3793  "Unexpected reduction lowering");
3794 
3795  MVT XLenVT = Subtarget.getXLenVT();
3796  assert(Op.getValueType() == XLenVT &&
3797  "Expected reduction output to be legalized to XLenVT");
3798 
3799  MVT ContainerVT = VecVT;
3800  if (VecVT.isFixedLengthVector()) {
3801  ContainerVT = getContainerForFixedLengthVector(VecVT);
3802  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3803  }
3804 
3805  SDValue Mask, VL;
3806  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3807  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3808 
3809  switch (Op.getOpcode()) {
3810  default:
3811  llvm_unreachable("Unhandled reduction");
3812  case ISD::VECREDUCE_AND:
3813  // vpopc ~x == 0
3814  Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, Mask, VL);
3815  Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3816  return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETEQ);
3817  case ISD::VECREDUCE_OR:
3818  // vpopc x != 0
3819  Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3820  return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
3821  case ISD::VECREDUCE_XOR: {
3822  // ((vpopc x) & 1) != 0
3823  SDValue One = DAG.getConstant(1, DL, XLenVT);
3824  Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3825  Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
3826  return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
3827  }
3828  }
3829 }
3830 
3831 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
3832  SelectionDAG &DAG) const {
3833  SDLoc DL(Op);
3834  SDValue Vec = Op.getOperand(0);
3835  EVT VecEVT = Vec.getValueType();
3836 
3837  unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
3838 
3839  // Due to ordering in legalize types we may have a vector type that needs to
3840  // be split. Do that manually so we can get down to a legal type.
3841  while (getTypeAction(*DAG.getContext(), VecEVT) ==
3843  SDValue Lo, Hi;
3844  std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
3845  VecEVT = Lo.getValueType();
3846  Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
3847  }
3848 
3849  // TODO: The type may need to be widened rather than split. Or widened before
3850  // it can be split.
3851  if (!isTypeLegal(VecEVT))
3852  return SDValue();
3853 
3854  MVT VecVT = VecEVT.getSimpleVT();
3855  MVT VecEltVT = VecVT.getVectorElementType();
3856  unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
3857 
3858  MVT ContainerVT = VecVT;
3859  if (VecVT.isFixedLengthVector()) {
3860  ContainerVT = getContainerForFixedLengthVector(VecVT);
3861  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3862  }
3863 
3864  MVT M1VT = getLMUL1VT(ContainerVT);
3865 
3866  SDValue Mask, VL;
3867  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3868 
3869  // FIXME: This is a VLMAX splat which might be too large and can prevent
3870  // vsetvli removal.
3871  SDValue NeutralElem =
3872  DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
3873  SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
3874  SDValue Reduction =
3875  DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL);
3876  SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
3877  DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3878  return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
3879 }
3880 
3881 // Given a reduction op, this function returns the matching reduction opcode,
3882 // the vector SDValue and the scalar SDValue required to lower this to a
3883 // RISCVISD node.
3884 static std::tuple<unsigned, SDValue, SDValue>
3886  SDLoc DL(Op);
3887  auto Flags = Op->getFlags();
3888  unsigned Opcode = Op.getOpcode();
3889  unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
3890  switch (Opcode) {
3891  default:
3892  llvm_unreachable("Unhandled reduction");
3893  case ISD::VECREDUCE_FADD:
3894  return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
3895  DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
3897  return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
3898  Op.getOperand(0));
3899  case ISD::VECREDUCE_FMIN:
3900  return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
3901  DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
3902  case ISD::VECREDUCE_FMAX:
3903  return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
3904  DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
3905  }
3906 }
3907 
3908 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
3909  SelectionDAG &DAG) const {
3910  SDLoc DL(Op);
3911  MVT VecEltVT = Op.getSimpleValueType();
3912 
3913  unsigned RVVOpcode;
3914  SDValue VectorVal, ScalarVal;
3915  std::tie(RVVOpcode, VectorVal, ScalarVal) =
3916  getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
3917  MVT VecVT = VectorVal.getSimpleValueType();
3918 
3919  MVT ContainerVT = VecVT;
3920  if (VecVT.isFixedLengthVector()) {
3921  ContainerVT = getContainerForFixedLengthVector(VecVT);
3922  VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
3923  }
3924 
3925  MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
3926 
3927  SDValue Mask, VL;
3928  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3929 
3930  // FIXME: This is a VLMAX splat which might be too large and can prevent
3931  // vsetvli removal.
3932  SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
3933  SDValue Reduction =
3934  DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL);
3935  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
3936  DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3937 }
3938 
3939 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
3940  SelectionDAG &DAG) const {
3941  SDValue Vec = Op.getOperand(0);
3942  SDValue SubVec = Op.getOperand(1);
3943  MVT VecVT = Vec.getSimpleValueType();
3944  MVT SubVecVT = SubVec.getSimpleValueType();
3945 
3946  SDLoc DL(Op);
3947  MVT XLenVT = Subtarget.getXLenVT();
3948  unsigned OrigIdx = Op.getConstantOperandVal(2);
3949  const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3950 
3951  // We don't have the ability to slide mask vectors up indexed by their i1
3952  // elements; the smallest we can do is i8. Often we are able to bitcast to
3953  // equivalent i8 vectors. Note that when inserting a fixed-length vector
3954  // into a scalable one, we might not necessarily have enough scalable