LLVM  13.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
16 #include "RISCV.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
30 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/KnownBits.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
46  const RISCVSubtarget &STI)
47  : TargetLowering(TM), Subtarget(STI) {
48 
49  if (Subtarget.isRV32E())
50  report_fatal_error("Codegen not yet implemented for RV32E");
51 
52  RISCVABI::ABI ABI = Subtarget.getTargetABI();
53  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
56  !Subtarget.hasStdExtF()) {
57  errs() << "Hard-float 'f' ABI can't be used for a target that "
58  "doesn't support the F instruction set extension (ignoring "
59  "target-abi)\n";
61  } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62  !Subtarget.hasStdExtD()) {
63  errs() << "Hard-float 'd' ABI can't be used for a target that "
64  "doesn't support the D instruction set extension (ignoring "
65  "target-abi)\n";
67  }
68 
69  switch (ABI) {
70  default:
71  report_fatal_error("Don't know how to lower this ABI");
75  case RISCVABI::ABI_LP64:
78  break;
79  }
80 
81  MVT XLenVT = Subtarget.getXLenVT();
82 
83  // Set up the register classes.
84  addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86  if (Subtarget.hasStdExtZfh())
87  addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88  if (Subtarget.hasStdExtF())
89  addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90  if (Subtarget.hasStdExtD())
91  addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92 
93  static const MVT::SimpleValueType BoolVecVTs[] = {
96  static const MVT::SimpleValueType IntVecVTs[] = {
102  static const MVT::SimpleValueType F16VecVTs[] = {
105  static const MVT::SimpleValueType F32VecVTs[] = {
107  static const MVT::SimpleValueType F64VecVTs[] = {
109 
110  if (Subtarget.hasStdExtV()) {
111  auto addRegClassForRVV = [this](MVT VT) {
112  unsigned Size = VT.getSizeInBits().getKnownMinValue();
113  assert(Size <= 512 && isPowerOf2_32(Size));
114  const TargetRegisterClass *RC;
115  if (Size <= 64)
116  RC = &RISCV::VRRegClass;
117  else if (Size == 128)
118  RC = &RISCV::VRM2RegClass;
119  else if (Size == 256)
120  RC = &RISCV::VRM4RegClass;
121  else
122  RC = &RISCV::VRM8RegClass;
123 
124  addRegisterClass(VT, RC);
125  };
126 
127  for (MVT VT : BoolVecVTs)
128  addRegClassForRVV(VT);
129  for (MVT VT : IntVecVTs)
130  addRegClassForRVV(VT);
131 
132  if (Subtarget.hasStdExtZfh())
133  for (MVT VT : F16VecVTs)
134  addRegClassForRVV(VT);
135 
136  if (Subtarget.hasStdExtF())
137  for (MVT VT : F32VecVTs)
138  addRegClassForRVV(VT);
139 
140  if (Subtarget.hasStdExtD())
141  for (MVT VT : F64VecVTs)
142  addRegClassForRVV(VT);
143 
144  if (Subtarget.useRVVForFixedLengthVectors()) {
145  auto addRegClassForFixedVectors = [this](MVT VT) {
146  unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
147  const TargetRegisterClass *RC;
148  if (LMul == 1 || VT.getVectorElementType() == MVT::i1)
149  RC = &RISCV::VRRegClass;
150  else if (LMul == 2)
151  RC = &RISCV::VRM2RegClass;
152  else if (LMul == 4)
153  RC = &RISCV::VRM4RegClass;
154  else if (LMul == 8)
155  RC = &RISCV::VRM8RegClass;
156  else
157  llvm_unreachable("Unexpected LMul!");
158 
159  addRegisterClass(VT, RC);
160  };
162  if (useRVVForFixedLengthVectorVT(VT))
163  addRegClassForFixedVectors(VT);
164 
166  if (useRVVForFixedLengthVectorVT(VT))
167  addRegClassForFixedVectors(VT);
168  }
169  }
170 
171  // Compute derived properties from the register classes.
173 
175 
176  for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
177  setLoadExtAction(N, XLenVT, MVT::i1, Promote);
178 
179  // TODO: add all necessary setOperationAction calls.
181 
186 
189 
194 
196  if (!Subtarget.hasStdExtZbb()) {
199  }
200 
201  if (Subtarget.hasStdExtZbb() && Subtarget.is64Bit())
203 
204  if (Subtarget.is64Bit()) {
210 
215  }
216 
217  if (!Subtarget.hasStdExtM()) {
225  } else {
226  if (Subtarget.is64Bit()) {
229 
239  } else {
241  }
242  }
243 
248 
252 
253  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
254  if (Subtarget.is64Bit()) {
257  }
258  } else {
261  }
262 
263  if (Subtarget.hasStdExtZbp()) {
264  // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
265  // more combining.
268 
269  if (Subtarget.is64Bit()) {
272  }
273  } else {
274  // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
275  // pattern match it directly in isel.
277  Subtarget.hasStdExtZbb() ? Legal : Expand);
278  }
279 
280  if (Subtarget.hasStdExtZbb()) {
285 
286  if (Subtarget.is64Bit()) {
291  }
292  } else {
296  }
297 
298  if (Subtarget.hasStdExtZbt()) {
302 
303  if (Subtarget.is64Bit()) {
306  }
307  } else {
309  }
310 
311  ISD::CondCode FPCCToExpand[] = {
315 
316  ISD::NodeType FPOpToExpand[] = {
319 
320  if (Subtarget.hasStdExtZfh())
322 
323  if (Subtarget.hasStdExtZfh()) {
326  for (auto CC : FPCCToExpand)
331  for (auto Op : FPOpToExpand)
333  }
334 
335  if (Subtarget.hasStdExtF()) {
338  for (auto CC : FPCCToExpand)
343  for (auto Op : FPOpToExpand)
347  }
348 
349  if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
351 
352  if (Subtarget.hasStdExtD()) {
355  for (auto CC : FPCCToExpand)
362  for (auto Op : FPOpToExpand)
366  }
367 
368  if (Subtarget.is64Bit()) {
373  }
374 
379 
381 
382  // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
383  // Unfortunately this can't be determined just from the ISA naming string.
385  Subtarget.is64Bit() ? Legal : Custom);
386 
390 
391  if (Subtarget.hasStdExtA()) {
394  } else {
396  }
397 
399 
400  if (Subtarget.hasStdExtV()) {
402 
404 
405  // RVV intrinsics may have illegal operands.
406  // We also need to custom legalize vmv.x.s.
415 
417 
418  if (!Subtarget.is64Bit()) {
419  // We must custom-lower certain vXi64 operations on RV32 due to the vector
420  // element type being illegal.
423 
432  }
433 
434  for (MVT VT : BoolVecVTs) {
436 
437  // Mask VTs are custom-expanded into a series of standard nodes
441 
443 
447 
448  // Expand all extending loads to types larger than this, and truncating
449  // stores from types larger than this.
450  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
451  setTruncStoreAction(OtherVT, VT, Expand);
452  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
453  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
454  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
455  }
456  }
457 
458  for (MVT VT : IntVecVTs) {
461 
466 
469 
470  // Custom-lower extensions and truncations from/to mask types.
474 
475  // RVV has native int->float & float->int conversions where the
476  // element type sizes are within one power-of-two of each other. Any
477  // wider distances between type sizes have to be lowered as sequences
478  // which progressively narrow the gap in stages.
483 
484  // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
485  // nodes which truncate by one power of two at a time.
487 
488  // Custom-lower insert/extract operations to simplify patterns.
491 
492  // Custom-lower reduction operations to set up the corresponding custom
493  // nodes' operands.
502 
507 
511 
514 
515  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
516  setTruncStoreAction(VT, OtherVT, Expand);
517  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
518  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
519  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
520  }
521  }
522 
523  // Expand various CCs to best match the RVV ISA, which natively supports UNE
524  // but no other unordered comparisons, and supports all ordered comparisons
525  // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
526  // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
527  // and we pattern-match those back to the "original", swapping operands once
528  // more. This way we catch both operations and both "vf" and "fv" forms with
529  // fewer patterns.
530  ISD::CondCode VFPCCToExpand[] = {
534  };
535 
536  // Sets common operation actions on RVV floating-point vector types.
537  const auto SetCommonVFPActions = [&](MVT VT) {
539  // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
540  // sizes are within one power-of-two of each other. Therefore conversions
541  // between vXf16 and vXf64 must be lowered as sequences which convert via
542  // vXf32.
545  // Custom-lower insert/extract operations to simplify patterns.
548  // Expand various condition codes (explained above).
549  for (auto CC : VFPCCToExpand)
550  setCondCodeAction(CC, VT, Expand);
551 
555 
560 
564 
566  };
567 
568  // Sets common extload/truncstore actions on RVV floating-point vector
569  // types.
570  const auto SetCommonVFPExtLoadTruncStoreActions =
571  [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
572  for (auto SmallVT : SmallerVTs) {
573  setTruncStoreAction(VT, SmallVT, Expand);
574  setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
575  }
576  };
577 
578  if (Subtarget.hasStdExtZfh())
579  for (MVT VT : F16VecVTs)
580  SetCommonVFPActions(VT);
581 
582  for (MVT VT : F32VecVTs) {
583  if (Subtarget.hasStdExtF())
584  SetCommonVFPActions(VT);
585  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
586  }
587 
588  for (MVT VT : F64VecVTs) {
589  if (Subtarget.hasStdExtD())
590  SetCommonVFPActions(VT);
591  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
592  SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
593  }
594 
595  if (Subtarget.useRVVForFixedLengthVectors()) {
597  if (!useRVVForFixedLengthVectorVT(VT))
598  continue;
599 
600  // By default everything must be expanded.
601  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
603  for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
604  setTruncStoreAction(VT, OtherVT, Expand);
605  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
606  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
607  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
608  }
609 
610  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
613 
616 
618 
621 
623 
625 
627 
631 
632  // Operations below are different for between masks and other vectors.
633  if (VT.getVectorElementType() == MVT::i1) {
637  continue;
638  }
639 
642 
660 
666 
669 
674 
676 
680 
681  // Custom-lower reduction operations to set up the corresponding custom
682  // nodes' operands.
688  }
689 
691  if (!useRVVForFixedLengthVectorVT(VT))
692  continue;
693 
694  // By default everything must be expanded.
695  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
697  for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
698  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
699  setTruncStoreAction(VT, OtherVT, Expand);
700  }
701 
702  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
705 
710 
726 
729 
730  for (auto CC : VFPCCToExpand)
731  setCondCodeAction(CC, VT, Expand);
732 
734 
736 
739  }
740 
741  // Custom-legalize bitcasts from fixed-length vectors to scalar types.
749  }
750  }
751 
752  // Function alignments.
753  const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
754  setMinFunctionAlignment(FunctionAlignment);
755  setPrefFunctionAlignment(FunctionAlignment);
756 
758 
759  // Jumps are expensive, compared to logic
761 
762  // We can use any register for comparisons
764 
765  if (Subtarget.hasStdExtZbp()) {
767  }
768  if (Subtarget.hasStdExtV()) {
772  }
773 }
774 
777  EVT VT) const {
778  if (!VT.isVector())
779  return getPointerTy(DL);
780  if (Subtarget.hasStdExtV() &&
781  (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
784 }
785 
787  const CallInst &I,
788  MachineFunction &MF,
789  unsigned Intrinsic) const {
790  switch (Intrinsic) {
791  default:
792  return false;
793  case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
794  case Intrinsic::riscv_masked_atomicrmw_add_i32:
795  case Intrinsic::riscv_masked_atomicrmw_sub_i32:
796  case Intrinsic::riscv_masked_atomicrmw_nand_i32:
797  case Intrinsic::riscv_masked_atomicrmw_max_i32:
798  case Intrinsic::riscv_masked_atomicrmw_min_i32:
799  case Intrinsic::riscv_masked_atomicrmw_umax_i32:
800  case Intrinsic::riscv_masked_atomicrmw_umin_i32:
801  case Intrinsic::riscv_masked_cmpxchg_i32:
802  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
804  Info.memVT = MVT::getVT(PtrTy->getElementType());
805  Info.ptrVal = I.getArgOperand(0);
806  Info.offset = 0;
807  Info.align = Align(4);
810  return true;
811  }
812 }
813 
815  const AddrMode &AM, Type *Ty,
816  unsigned AS,
817  Instruction *I) const {
818  // No global is ever allowed as a base.
819  if (AM.BaseGV)
820  return false;
821 
822  // Require a 12-bit signed offset.
823  if (!isInt<12>(AM.BaseOffs))
824  return false;
825 
826  switch (AM.Scale) {
827  case 0: // "r+i" or just "i", depending on HasBaseReg.
828  break;
829  case 1:
830  if (!AM.HasBaseReg) // allow "r+i".
831  break;
832  return false; // disallow "r+r" or "r+r+i".
833  default:
834  return false;
835  }
836 
837  return true;
838 }
839 
841  return isInt<12>(Imm);
842 }
843 
845  return isInt<12>(Imm);
846 }
847 
848 // On RV32, 64-bit integers are split into their high and low parts and held
849 // in two different registers, so the trunc is free since the low register can
850 // just be used.
851 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
852  if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
853  return false;
854  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
855  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
856  return (SrcBits == 64 && DestBits == 32);
857 }
858 
859 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
860  if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
861  !SrcVT.isInteger() || !DstVT.isInteger())
862  return false;
863  unsigned SrcBits = SrcVT.getSizeInBits();
864  unsigned DestBits = DstVT.getSizeInBits();
865  return (SrcBits == 64 && DestBits == 32);
866 }
867 
869  // Zexts are free if they can be combined with a load.
870  if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
871  EVT MemVT = LD->getMemoryVT();
872  if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
873  (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
874  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
875  LD->getExtensionType() == ISD::ZEXTLOAD))
876  return true;
877  }
878 
879  return TargetLowering::isZExtFree(Val, VT2);
880 }
881 
883  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
884 }
885 
887  return Subtarget.hasStdExtZbb();
888 }
889 
891  return Subtarget.hasStdExtZbb();
892 }
893 
895  bool ForCodeSize) const {
896  if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
897  return false;
898  if (VT == MVT::f32 && !Subtarget.hasStdExtF())
899  return false;
900  if (VT == MVT::f64 && !Subtarget.hasStdExtD())
901  return false;
902  if (Imm.isNegZero())
903  return false;
904  return Imm.isZero();
905 }
906 
908  return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
909  (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
910  (VT == MVT::f64 && Subtarget.hasStdExtD());
911 }
912 
914  CallingConv::ID CC,
915  EVT VT) const {
916  // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
917  // end up using a GPR but that will be decided based on ABI.
918  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
919  return MVT::f32;
920 
922 }
923 
925  CallingConv::ID CC,
926  EVT VT) const {
927  // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
928  // end up using a GPR but that will be decided based on ABI.
929  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
930  return 1;
931 
933 }
934 
935 // Changes the condition code and swaps operands if necessary, so the SetCC
936 // operation matches one of the comparisons supported directly by branches
937 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
938 // with 1/-1.
939 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
940  ISD::CondCode &CC, SelectionDAG &DAG) {
941  // Convert X > -1 to X >= 0.
942  if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
943  RHS = DAG.getConstant(0, DL, RHS.getValueType());
944  CC = ISD::SETGE;
945  return;
946  }
947  // Convert X < 1 to 0 >= X.
948  if (CC == ISD::SETLT && isOneConstant(RHS)) {
949  RHS = LHS;
950  LHS = DAG.getConstant(0, DL, RHS.getValueType());
951  CC = ISD::SETGE;
952  return;
953  }
954 
955  switch (CC) {
956  default:
957  break;
958  case ISD::SETGT:
959  case ISD::SETLE:
960  case ISD::SETUGT:
961  case ISD::SETULE:
963  std::swap(LHS, RHS);
964  break;
965  }
966 }
967 
968 // Return the RISC-V branch opcode that matches the given DAG integer
969 // condition code. The CondCode must be one of those supported by the RISC-V
970 // ISA (see translateSetCCForBranch).
972  switch (CC) {
973  default:
974  llvm_unreachable("Unsupported CondCode");
975  case ISD::SETEQ:
976  return RISCV::BEQ;
977  case ISD::SETNE:
978  return RISCV::BNE;
979  case ISD::SETLT:
980  return RISCV::BLT;
981  case ISD::SETGE:
982  return RISCV::BGE;
983  case ISD::SETULT:
984  return RISCV::BLTU;
985  case ISD::SETUGE:
986  return RISCV::BGEU;
987  }
988 }
989 
991  assert(VT.isScalableVector() && "Expecting a scalable vector type");
992  unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
993  if (VT.getVectorElementType() == MVT::i1)
994  KnownSize *= 8;
995 
996  switch (KnownSize) {
997  default:
998  llvm_unreachable("Invalid LMUL.");
999  case 8:
1000  return RISCVVLMUL::LMUL_F8;
1001  case 16:
1002  return RISCVVLMUL::LMUL_F4;
1003  case 32:
1004  return RISCVVLMUL::LMUL_F2;
1005  case 64:
1006  return RISCVVLMUL::LMUL_1;
1007  case 128:
1008  return RISCVVLMUL::LMUL_2;
1009  case 256:
1010  return RISCVVLMUL::LMUL_4;
1011  case 512:
1012  return RISCVVLMUL::LMUL_8;
1013  }
1014 }
1015 
1017  switch (LMul) {
1018  default:
1019  llvm_unreachable("Invalid LMUL.");
1020  case RISCVVLMUL::LMUL_F8:
1021  case RISCVVLMUL::LMUL_F4:
1022  case RISCVVLMUL::LMUL_F2:
1023  case RISCVVLMUL::LMUL_1:
1024  return RISCV::VRRegClassID;
1025  case RISCVVLMUL::LMUL_2:
1026  return RISCV::VRM2RegClassID;
1027  case RISCVVLMUL::LMUL_4:
1028  return RISCV::VRM4RegClassID;
1029  case RISCVVLMUL::LMUL_8:
1030  return RISCV::VRM8RegClassID;
1031  }
1032 }
1033 
1035  RISCVVLMUL LMUL = getLMUL(VT);
1038  static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1039  "Unexpected subreg numbering");
1040  return RISCV::sub_vrm1_0 + Index;
1041  }
1042  if (LMUL == RISCVVLMUL::LMUL_2) {
1043  static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1044  "Unexpected subreg numbering");
1045  return RISCV::sub_vrm2_0 + Index;
1046  }
1047  if (LMUL == RISCVVLMUL::LMUL_4) {
1048  static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1049  "Unexpected subreg numbering");
1050  return RISCV::sub_vrm4_0 + Index;
1051  }
1052  llvm_unreachable("Invalid vector type.");
1053 }
1054 
1056  if (VT.getVectorElementType() == MVT::i1)
1057  return RISCV::VRRegClassID;
1058  return getRegClassIDForLMUL(getLMUL(VT));
1059 }
1060 
1061 // Attempt to decompose a subvector insert/extract between VecVT and
1062 // SubVecVT via subregister indices. Returns the subregister index that
1063 // can perform the subvector insert/extract with the given element index, as
1064 // well as the index corresponding to any leftover subvectors that must be
1065 // further inserted/extracted within the register class for SubVecVT.
1066 std::pair<unsigned, unsigned>
1068  MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1069  const RISCVRegisterInfo *TRI) {
1070  static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1071  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1072  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1073  "Register classes not ordered");
1074  unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1075  unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1076  // Try to compose a subregister index that takes us from the incoming
1077  // LMUL>1 register class down to the outgoing one. At each step we half
1078  // the LMUL:
1079  // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1080  // Note that this is not guaranteed to find a subregister index, such as
1081  // when we are extracting from one VR type to another.
1082  unsigned SubRegIdx = RISCV::NoSubRegister;
1083  for (const unsigned RCID :
1084  {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1085  if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1086  VecVT = VecVT.getHalfNumVectorElementsVT();
1087  bool IsHi =
1088  InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1089  SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1090  getSubregIndexByMVT(VecVT, IsHi));
1091  if (IsHi)
1092  InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1093  }
1094  return {SubRegIdx, InsertExtractIdx};
1095 }
1096 
1097 // Return the largest legal scalable vector type that matches VT's element type.
1099  const TargetLowering &TLI, MVT VT, const RISCVSubtarget &Subtarget) {
1100  assert(VT.isFixedLengthVector() && TLI.isTypeLegal(VT) &&
1101  "Expected legal fixed length vector!");
1102 
1103  unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
1104  assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
1105 
1106  MVT EltVT = VT.getVectorElementType();
1107  switch (EltVT.SimpleTy) {
1108  default:
1109  llvm_unreachable("unexpected element type for RVV container");
1110  case MVT::i1: {
1111  // Masks are calculated assuming 8-bit elements since that's when we need
1112  // the most elements.
1113  unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8;
1114  return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock);
1115  }
1116  case MVT::i8:
1117  case MVT::i16:
1118  case MVT::i32:
1119  case MVT::i64:
1120  case MVT::f16:
1121  case MVT::f32:
1122  case MVT::f64: {
1123  unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits();
1124  return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock);
1125  }
1126  }
1127 }
1128 
1130  SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) {
1132  Subtarget);
1133 }
1134 
1136  return getContainerForFixedLengthVector(*this, VT, getSubtarget());
1137 }
1138 
1139 // Grow V to consume an entire RVV register.
1141  const RISCVSubtarget &Subtarget) {
1142  assert(VT.isScalableVector() &&
1143  "Expected to convert into a scalable vector!");
1145  "Expected a fixed length vector operand!");
1146  SDLoc DL(V);
1147  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1148  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1149 }
1150 
1151 // Shrink V so it's just big enough to maintain a VT's worth of data.
1153  const RISCVSubtarget &Subtarget) {
1154  assert(VT.isFixedLengthVector() &&
1155  "Expected to convert into a fixed length vector!");
1157  "Expected a scalable vector operand!");
1158  SDLoc DL(V);
1159  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1160  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1161 }
1162 
1163 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1164 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1165 // the vector type that it is contained in.
1166 static std::pair<SDValue, SDValue>
1167 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1168  const RISCVSubtarget &Subtarget) {
1169  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1170  MVT XLenVT = Subtarget.getXLenVT();
1171  SDValue VL = VecVT.isFixedLengthVector()
1172  ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1173  : DAG.getRegister(RISCV::X0, XLenVT);
1174  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1175  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1176  return {Mask, VL};
1177 }
1178 
1179 // As above but assuming the given type is a scalable vector type.
1180 static std::pair<SDValue, SDValue>
1182  const RISCVSubtarget &Subtarget) {
1183  assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1184  return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1185 }
1186 
1187 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1188 // of either is (currently) supported. This can get us into an infinite loop
1189 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1190 // as a ..., etc.
1191 // Until either (or both) of these can reliably lower any node, reporting that
1192 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1193 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1194 // which is not desirable.
1196  EVT VT, unsigned DefinedValues) const {
1197  return false;
1198 }
1199 
1201  // Only splats are currently supported.
1202  if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1203  return true;
1204 
1205  return false;
1206 }
1207 
1209  const RISCVSubtarget &Subtarget) {
1210  MVT VT = Op.getSimpleValueType();
1211  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1212 
1213  MVT ContainerVT =
1215 
1216  SDLoc DL(Op);
1217  SDValue Mask, VL;
1218  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1219 
1220  MVT XLenVT = Subtarget.getXLenVT();
1221  unsigned NumElts = Op.getNumOperands();
1222 
1223  if (VT.getVectorElementType() == MVT::i1) {
1224  if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1225  SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1226  return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1227  }
1228 
1229  if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1230  SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1231  return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1232  }
1233 
1234  // Lower constant mask BUILD_VECTORs via an integer vector type, in
1235  // scalar integer chunks whose bit-width depends on the number of mask
1236  // bits and XLEN.
1237  // First, determine the most appropriate scalar integer type to use. This
1238  // is at most XLenVT, but may be shrunk to a smaller vector element type
1239  // according to the size of the final vector - use i8 chunks rather than
1240  // XLenVT if we're producing a v8i1. This results in more consistent
1241  // codegen across RV32 and RV64.
1242  // If we have to use more than one INSERT_VECTOR_ELT then this optimization
1243  // is likely to increase code size; avoid peforming it in such a case.
1244  unsigned NumViaIntegerBits =
1245  std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1246  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
1247  (!DAG.shouldOptForSize() || NumElts <= NumViaIntegerBits)) {
1248  // Now we can create our integer vector type. Note that it may be larger
1249  // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1250  MVT IntegerViaVecVT =
1251  MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1252  divideCeil(NumElts, NumViaIntegerBits));
1253 
1254  uint64_t Bits = 0;
1255  unsigned BitPos = 0, IntegerEltIdx = 0;
1256  SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1257 
1258  for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1259  // Once we accumulate enough bits to fill our scalar type, insert into
1260  // our vector and clear our accumulated data.
1261  if (I != 0 && I % NumViaIntegerBits == 0) {
1262  if (NumViaIntegerBits <= 32)
1263  Bits = SignExtend64(Bits, 32);
1264  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1265  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1266  Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1267  Bits = 0;
1268  BitPos = 0;
1269  IntegerEltIdx++;
1270  }
1271  SDValue V = Op.getOperand(I);
1272  bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
1273  Bits |= ((uint64_t)BitValue << BitPos);
1274  }
1275 
1276  // Insert the (remaining) scalar value into position in our integer
1277  // vector type.
1278  if (NumViaIntegerBits <= 32)
1279  Bits = SignExtend64(Bits, 32);
1280  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1281  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
1282  DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1283 
1284  if (NumElts < NumViaIntegerBits) {
1285  // If we're producing a smaller vector than our minimum legal integer
1286  // type, bitcast to the equivalent (known-legal) mask type, and extract
1287  // our final mask.
1288  assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
1289  Vec = DAG.getBitcast(MVT::v8i1, Vec);
1290  Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
1291  DAG.getConstant(0, DL, XLenVT));
1292  } else {
1293  // Else we must have produced an integer type with the same size as the
1294  // mask type; bitcast for the final result.
1295  assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
1296  Vec = DAG.getBitcast(VT, Vec);
1297  }
1298 
1299  return Vec;
1300  }
1301 
1302  return SDValue();
1303  }
1304 
1305  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1306  unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1308  Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1309  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1310  }
1311 
1312  // Try and match an index sequence, which we can lower directly to the vid
1313  // instruction. An all-undef vector is matched by getSplatValue, above.
1314  if (VT.isInteger()) {
1315  bool IsVID = true;
1316  for (unsigned I = 0; I < NumElts && IsVID; I++)
1317  IsVID &= Op.getOperand(I).isUndef() ||
1318  (isa<ConstantSDNode>(Op.getOperand(I)) &&
1319  Op.getConstantOperandVal(I) == I);
1320 
1321  if (IsVID) {
1322  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1323  return convertFromScalableVector(VT, VID, DAG, Subtarget);
1324  }
1325  }
1326 
1327  // Attempt to detect "hidden" splats, which only reveal themselves as splats
1328  // when re-interpreted as a vector with a larger element type. For example,
1329  // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
1330  // could be instead splat as
1331  // v2i32 = build_vector i32 0x00010000, i32 0x00010000
1332  // TODO: This optimization could also work on non-constant splats, but it
1333  // would require bit-manipulation instructions to construct the splat value.
1335  unsigned EltBitSize = VT.getScalarSizeInBits();
1336  const auto *BV = cast<BuildVectorSDNode>(Op);
1337  if (VT.isInteger() && EltBitSize < 64 &&
1339  BV->getRepeatedSequence(Sequence) &&
1340  (Sequence.size() * EltBitSize) <= 64) {
1341  unsigned SeqLen = Sequence.size();
1342  MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
1343  MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
1344  assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
1345  ViaIntVT == MVT::i64) &&
1346  "Unexpected sequence type");
1347 
1348  unsigned EltIdx = 0;
1349  uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
1350  uint64_t SplatValue = 0;
1351  // Construct the amalgamated value which can be splatted as this larger
1352  // vector type.
1353  for (const auto &SeqV : Sequence) {
1354  if (!SeqV.isUndef())
1355  SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
1356  << (EltIdx * EltBitSize));
1357  EltIdx++;
1358  }
1359 
1360  // On RV64, sign-extend from 32 to 64 bits where possible in order to
1361  // achieve better constant materializion.
1362  if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
1363  SplatValue = SignExtend64(SplatValue, 32);
1364 
1365  // Since we can't introduce illegal i64 types at this stage, we can only
1366  // perform an i64 splat on RV32 if it is its own sign-extended value. That
1367  // way we can use RVV instructions to splat.
1368  assert((ViaIntVT.bitsLE(XLenVT) ||
1369  (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
1370  "Unexpected bitcast sequence");
1371  if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
1372  SDValue ViaVL =
1373  DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
1374  MVT ViaContainerVT =
1376  Subtarget);
1377  SDValue Splat =
1378  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
1379  DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
1380  Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
1381  return DAG.getBitcast(VT, Splat);
1382  }
1383  }
1384 
1385  // Try and optimize BUILD_VECTORs with "dominant values" - these are values
1386  // which constitute a large proportion of the elements. In such cases we can
1387  // splat a vector with the dominant element and make up the shortfall with
1388  // INSERT_VECTOR_ELTs.
1389  // Note that this includes vectors of 2 elements by association. The
1390  // upper-most element is the "dominant" one, allowing us to use a splat to
1391  // "insert" the upper element, and an insert of the lower element at position
1392  // 0, which improves codegen.
1393  SDValue DominantValue;
1394  unsigned MostCommonCount = 0;
1395  DenseMap<SDValue, unsigned> ValueCounts;
1396  unsigned NumUndefElts =
1397  count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
1398 
1399  for (SDValue V : Op->op_values()) {
1400  if (V.isUndef())
1401  continue;
1402 
1403  ValueCounts.insert(std::make_pair(V, 0));
1404  unsigned &Count = ValueCounts[V];
1405 
1406  // Is this value dominant? In case of a tie, prefer the highest element as
1407  // it's cheaper to insert near the beginning of a vector than it is at the
1408  // end.
1409  if (++Count >= MostCommonCount) {
1410  DominantValue = V;
1411  MostCommonCount = Count;
1412  }
1413  }
1414 
1415  assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
1416  unsigned NumDefElts = NumElts - NumUndefElts;
1417  unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
1418 
1419  // Don't perform this optimization when optimizing for size, since
1420  // materializing elements and inserting them tends to cause code bloat.
1421  if (!DAG.shouldOptForSize() &&
1422  ((MostCommonCount > DominantValueCountThreshold) ||
1423  (ValueCounts.size() <= Log2_32(NumDefElts)))) {
1424  // Start by splatting the most common element.
1425  SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
1426 
1427  DenseSet<SDValue> Processed{DominantValue};
1428  MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
1429  for (const auto &OpIdx : enumerate(Op->ops())) {
1430  const SDValue &V = OpIdx.value();
1431  if (V.isUndef() || !Processed.insert(V).second)
1432  continue;
1433  if (ValueCounts[V] == 1) {
1434  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
1435  DAG.getConstant(OpIdx.index(), DL, XLenVT));
1436  } else {
1437  // Blend in all instances of this value using a VSELECT, using a
1438  // mask where each bit signals whether that element is the one
1439  // we're after.
1441  transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
1442  return DAG.getConstant(V == V1, DL, XLenVT);
1443  });
1444  Vec = DAG.getNode(ISD::VSELECT, DL, VT,
1445  DAG.getBuildVector(SelMaskTy, DL, Ops),
1446  DAG.getSplatBuildVector(VT, DL, V), Vec);
1447  }
1448  }
1449 
1450  return Vec;
1451  }
1452 
1453  return SDValue();
1454 }
1455 
1457  const RISCVSubtarget &Subtarget) {
1458  SDValue V1 = Op.getOperand(0);
1459  SDValue V2 = Op.getOperand(1);
1460  SDLoc DL(Op);
1461  MVT XLenVT = Subtarget.getXLenVT();
1462  MVT VT = Op.getSimpleValueType();
1463  unsigned NumElts = VT.getVectorNumElements();
1464  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
1465 
1466  if (SVN->isSplat()) {
1467  int Lane = SVN->getSplatIndex();
1468  if (Lane >= 0) {
1470  DAG, VT, Subtarget);
1471 
1472  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
1473  assert(Lane < (int)NumElts && "Unexpected lane!");
1474 
1475  SDValue Mask, VL;
1476  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1477  SDValue Gather =
1478  DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
1479  DAG.getConstant(Lane, DL, XLenVT), Mask, VL);
1480  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1481  }
1482  }
1483 
1484  // Detect shuffles which can be re-expressed as vector selects.
1485  SmallVector<SDValue> MaskVals;
1486  // By default we preserve the original operand order, and select LHS as true
1487  // and RHS as false. However, since RVV vector selects may feature splats but
1488  // only on the LHS, we may choose to invert our mask and instead select
1489  // between RHS and LHS.
1490  bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
1491 
1492  bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
1493  int MaskIndex = MaskIdx.value();
1494  bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
1495  MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
1496  return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
1497  });
1498 
1499  if (IsSelect) {
1500  assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
1501  MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
1502  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
1503  return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SwapOps ? V2 : V1,
1504  SwapOps ? V1 : V2);
1505  }
1506 
1507  return SDValue();
1508 }
1509 
1510 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
1511  SDLoc DL, SelectionDAG &DAG,
1512  const RISCVSubtarget &Subtarget) {
1513  if (VT.isScalableVector())
1514  return DAG.getFPExtendOrRound(Op, DL, VT);
1515  assert(VT.isFixedLengthVector() &&
1516  "Unexpected value type for RVV FP extend/round lowering");
1517  SDValue Mask, VL;
1518  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1519  unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
1522  return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
1523 }
1524 
1526  SelectionDAG &DAG) const {
1527  switch (Op.getOpcode()) {
1528  default:
1529  report_fatal_error("unimplemented operand");
1530  case ISD::GlobalAddress:
1531  return lowerGlobalAddress(Op, DAG);
1532  case ISD::BlockAddress:
1533  return lowerBlockAddress(Op, DAG);
1534  case ISD::ConstantPool:
1535  return lowerConstantPool(Op, DAG);
1536  case ISD::JumpTable:
1537  return lowerJumpTable(Op, DAG);
1538  case ISD::GlobalTLSAddress:
1539  return lowerGlobalTLSAddress(Op, DAG);
1540  case ISD::SELECT:
1541  return lowerSELECT(Op, DAG);
1542  case ISD::BRCOND:
1543  return lowerBRCOND(Op, DAG);
1544  case ISD::VASTART:
1545  return lowerVASTART(Op, DAG);
1546  case ISD::FRAMEADDR:
1547  return lowerFRAMEADDR(Op, DAG);
1548  case ISD::RETURNADDR:
1549  return lowerRETURNADDR(Op, DAG);
1550  case ISD::SHL_PARTS:
1551  return lowerShiftLeftParts(Op, DAG);
1552  case ISD::SRA_PARTS:
1553  return lowerShiftRightParts(Op, DAG, true);
1554  case ISD::SRL_PARTS:
1555  return lowerShiftRightParts(Op, DAG, false);
1556  case ISD::BITCAST: {
1557  SDLoc DL(Op);
1558  EVT VT = Op.getValueType();
1559  SDValue Op0 = Op.getOperand(0);
1560  EVT Op0VT = Op0.getValueType();
1561  MVT XLenVT = Subtarget.getXLenVT();
1562  if (VT.isFixedLengthVector()) {
1563  // We can handle fixed length vector bitcasts with a simple replacement
1564  // in isel.
1565  if (Op0VT.isFixedLengthVector())
1566  return Op;
1567  // When bitcasting from scalar to fixed-length vector, insert the scalar
1568  // into a one-element vector of the result type, and perform a vector
1569  // bitcast.
1570  if (!Op0VT.isVector()) {
1571  auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
1572  return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
1573  DAG.getUNDEF(BVT), Op0,
1574  DAG.getConstant(0, DL, XLenVT)));
1575  }
1576  return SDValue();
1577  }
1578  // Custom-legalize bitcasts from fixed-length vector types to scalar types
1579  // thus: bitcast the vector to a one-element vector type whose element type
1580  // is the same as the result type, and extract the first element.
1581  if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
1582  LLVMContext &Context = *DAG.getContext();
1583  SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
1584  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
1585  DAG.getConstant(0, DL, XLenVT));
1586  }
1587  if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
1588  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
1589  SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
1590  return FPConv;
1591  }
1592  if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
1593  Subtarget.hasStdExtF()) {
1594  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1595  SDValue FPConv =
1596  DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
1597  return FPConv;
1598  }
1599  return SDValue();
1600  }
1602  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
1604  return LowerINTRINSIC_W_CHAIN(Op, DAG);
1605  case ISD::BSWAP:
1606  case ISD::BITREVERSE: {
1607  // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
1608  assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1609  MVT VT = Op.getSimpleValueType();
1610  SDLoc DL(Op);
1611  // Start with the maximum immediate value which is the bitwidth - 1.
1612  unsigned Imm = VT.getSizeInBits() - 1;
1613  // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
1614  if (Op.getOpcode() == ISD::BSWAP)
1615  Imm &= ~0x7U;
1616  return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
1617  DAG.getConstant(Imm, DL, VT));
1618  }
1619  case ISD::FSHL:
1620  case ISD::FSHR: {
1621  MVT VT = Op.getSimpleValueType();
1622  assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
1623  SDLoc DL(Op);
1624  if (Op.getOperand(2).getOpcode() == ISD::Constant)
1625  return Op;
1626  // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
1627  // use log(XLen) bits. Mask the shift amount accordingly.
1628  unsigned ShAmtWidth = Subtarget.getXLen() - 1;
1629  SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
1630  DAG.getConstant(ShAmtWidth, DL, VT));
1631  unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
1632  return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
1633  }
1634  case ISD::TRUNCATE: {
1635  SDLoc DL(Op);
1636  MVT VT = Op.getSimpleValueType();
1637  // Only custom-lower vector truncates
1638  if (!VT.isVector())
1639  return Op;
1640 
1641  // Truncates to mask types are handled differently
1642  if (VT.getVectorElementType() == MVT::i1)
1643  return lowerVectorMaskTrunc(Op, DAG);
1644 
1645  // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
1646  // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
1647  // truncate by one power of two at a time.
1648  MVT DstEltVT = VT.getVectorElementType();
1649 
1650  SDValue Src = Op.getOperand(0);
1651  MVT SrcVT = Src.getSimpleValueType();
1652  MVT SrcEltVT = SrcVT.getVectorElementType();
1653 
1654  assert(DstEltVT.bitsLT(SrcEltVT) &&
1655  isPowerOf2_64(DstEltVT.getSizeInBits()) &&
1656  isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
1657  "Unexpected vector truncate lowering");
1658 
1659  MVT ContainerVT = SrcVT;
1660  if (SrcVT.isFixedLengthVector()) {
1661  ContainerVT = getContainerForFixedLengthVector(SrcVT);
1662  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
1663  }
1664 
1665  SDValue Result = Src;
1666  SDValue Mask, VL;
1667  std::tie(Mask, VL) =
1668  getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
1669  LLVMContext &Context = *DAG.getContext();
1670  const ElementCount Count = ContainerVT.getVectorElementCount();
1671  do {
1672  SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
1673  EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
1674  Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
1675  Mask, VL);
1676  } while (SrcEltVT != DstEltVT);
1677 
1678  if (SrcVT.isFixedLengthVector())
1679  Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
1680 
1681  return Result;
1682  }
1683  case ISD::ANY_EXTEND:
1684  case ISD::ZERO_EXTEND:
1685  if (Op.getOperand(0).getValueType().isVector() &&
1686  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
1687  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
1688  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
1689  case ISD::SIGN_EXTEND:
1690  if (Op.getOperand(0).getValueType().isVector() &&
1691  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
1692  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
1693  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
1695  return lowerSPLAT_VECTOR_PARTS(Op, DAG);
1697  return lowerINSERT_VECTOR_ELT(Op, DAG);
1699  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1700  case ISD::VSCALE: {
1701  MVT VT = Op.getSimpleValueType();
1702  SDLoc DL(Op);
1703  SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
1704  // We define our scalable vector types for lmul=1 to use a 64 bit known
1705  // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
1706  // vscale as VLENB / 8.
1707  assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!");
1708  SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
1709  DAG.getConstant(3, DL, VT));
1710  return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
1711  }
1712  case ISD::FP_EXTEND: {
1713  // RVV can only do fp_extend to types double the size as the source. We
1714  // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
1715  // via f32.
1716  SDLoc DL(Op);
1717  MVT VT = Op.getSimpleValueType();
1718  SDValue Src = Op.getOperand(0);
1719  MVT SrcVT = Src.getSimpleValueType();
1720 
1721  // Prepare any fixed-length vector operands.
1722  MVT ContainerVT = VT;
1723  if (SrcVT.isFixedLengthVector()) {
1724  ContainerVT = getContainerForFixedLengthVector(VT);
1725  MVT SrcContainerVT =
1726  ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
1727  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1728  }
1729 
1730  if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
1731  SrcVT.getVectorElementType() != MVT::f16) {
1732  // For scalable vectors, we only need to close the gap between
1733  // vXf16->vXf64.
1734  if (!VT.isFixedLengthVector())
1735  return Op;
1736  // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
1737  Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
1738  return convertFromScalableVector(VT, Src, DAG, Subtarget);
1739  }
1740 
1741  MVT InterVT = VT.changeVectorElementType(MVT::f32);
1742  MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
1743  SDValue IntermediateExtend = getRVVFPExtendOrRound(
1744  Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
1745 
1746  SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
1747  DL, DAG, Subtarget);
1748  if (VT.isFixedLengthVector())
1749  return convertFromScalableVector(VT, Extend, DAG, Subtarget);
1750  return Extend;
1751  }
1752  case ISD::FP_ROUND: {
1753  // RVV can only do fp_round to types half the size as the source. We
1754  // custom-lower f64->f16 rounds via RVV's round-to-odd float
1755  // conversion instruction.
1756  SDLoc DL(Op);
1757  MVT VT = Op.getSimpleValueType();
1758  SDValue Src = Op.getOperand(0);
1759  MVT SrcVT = Src.getSimpleValueType();
1760 
1761  // Prepare any fixed-length vector operands.
1762  MVT ContainerVT = VT;
1763  if (VT.isFixedLengthVector()) {
1764  MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
1765  ContainerVT =
1766  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
1767  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1768  }
1769 
1770  if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
1771  SrcVT.getVectorElementType() != MVT::f64) {
1772  // For scalable vectors, we only need to close the gap between
1773  // vXf64<->vXf16.
1774  if (!VT.isFixedLengthVector())
1775  return Op;
1776  // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
1777  Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
1778  return convertFromScalableVector(VT, Src, DAG, Subtarget);
1779  }
1780 
1781  SDValue Mask, VL;
1782  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1783 
1784  MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
1785  SDValue IntermediateRound =
1786  DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
1787  SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
1788  DL, DAG, Subtarget);
1789 
1790  if (VT.isFixedLengthVector())
1791  return convertFromScalableVector(VT, Round, DAG, Subtarget);
1792  return Round;
1793  }
1794  case ISD::FP_TO_SINT:
1795  case ISD::FP_TO_UINT:
1796  case ISD::SINT_TO_FP:
1797  case ISD::UINT_TO_FP: {
1798  // RVV can only do fp<->int conversions to types half/double the size as
1799  // the source. We custom-lower any conversions that do two hops into
1800  // sequences.
1801  MVT VT = Op.getSimpleValueType();
1802  if (!VT.isVector())
1803  return Op;
1804  SDLoc DL(Op);
1805  SDValue Src = Op.getOperand(0);
1806  MVT EltVT = VT.getVectorElementType();
1807  MVT SrcVT = Src.getSimpleValueType();
1808  MVT SrcEltVT = SrcVT.getVectorElementType();
1809  unsigned EltSize = EltVT.getSizeInBits();
1810  unsigned SrcEltSize = SrcEltVT.getSizeInBits();
1811  assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
1812  "Unexpected vector element types");
1813 
1814  bool IsInt2FP = SrcEltVT.isInteger();
1815  // Widening conversions
1816  if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
1817  if (IsInt2FP) {
1818  // Do a regular integer sign/zero extension then convert to float.
1820  VT.getVectorElementCount());
1821  unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
1823  : ISD::SIGN_EXTEND;
1824  SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
1825  return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
1826  }
1827  // FP2Int
1828  assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
1829  // Do one doubling fp_extend then complete the operation by converting
1830  // to int.
1831  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1832  SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
1833  return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
1834  }
1835 
1836  // Narrowing conversions
1837  if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
1838  if (IsInt2FP) {
1839  // One narrowing int_to_fp, then an fp_round.
1840  assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
1841  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1842  SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
1843  return DAG.getFPExtendOrRound(Int2FP, DL, VT);
1844  }
1845  // FP2Int
1846  // One narrowing fp_to_int, then truncate the integer. If the float isn't
1847  // representable by the integer, the result is poison.
1848  MVT IVecVT =
1850  VT.getVectorElementCount());
1851  SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
1852  return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
1853  }
1854 
1855  // Scalable vectors can exit here. Patterns will handle equally-sized
1856  // conversions halving/doubling ones.
1857  if (!VT.isFixedLengthVector())
1858  return Op;
1859 
1860  // For fixed-length vectors we lower to a custom "VL" node.
1861  unsigned RVVOpc = 0;
1862  switch (Op.getOpcode()) {
1863  default:
1864  llvm_unreachable("Impossible opcode");
1865  case ISD::FP_TO_SINT:
1866  RVVOpc = RISCVISD::FP_TO_SINT_VL;
1867  break;
1868  case ISD::FP_TO_UINT:
1869  RVVOpc = RISCVISD::FP_TO_UINT_VL;
1870  break;
1871  case ISD::SINT_TO_FP:
1872  RVVOpc = RISCVISD::SINT_TO_FP_VL;
1873  break;
1874  case ISD::UINT_TO_FP:
1875  RVVOpc = RISCVISD::UINT_TO_FP_VL;
1876  break;
1877  }
1878 
1879  MVT ContainerVT, SrcContainerVT;
1880  // Derive the reference container type from the larger vector type.
1881  if (SrcEltSize > EltSize) {
1882  SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
1883  ContainerVT =
1884  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
1885  } else {
1886  ContainerVT = getContainerForFixedLengthVector(VT);
1887  SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
1888  }
1889 
1890  SDValue Mask, VL;
1891  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1892 
1893  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
1894  Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
1895  return convertFromScalableVector(VT, Src, DAG, Subtarget);
1896  }
1897  case ISD::VECREDUCE_ADD:
1898  case ISD::VECREDUCE_UMAX:
1899  case ISD::VECREDUCE_SMAX:
1900  case ISD::VECREDUCE_UMIN:
1901  case ISD::VECREDUCE_SMIN:
1902  return lowerVECREDUCE(Op, DAG);
1903  case ISD::VECREDUCE_AND:
1904  case ISD::VECREDUCE_OR:
1905  case ISD::VECREDUCE_XOR:
1906  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
1907  return lowerVectorMaskVECREDUCE(Op, DAG);
1908  return lowerVECREDUCE(Op, DAG);
1909  case ISD::VECREDUCE_FADD:
1911  return lowerFPVECREDUCE(Op, DAG);
1912  case ISD::INSERT_SUBVECTOR:
1913  return lowerINSERT_SUBVECTOR(Op, DAG);
1915  return lowerEXTRACT_SUBVECTOR(Op, DAG);
1916  case ISD::STEP_VECTOR:
1917  return lowerSTEP_VECTOR(Op, DAG);
1918  case ISD::VECTOR_REVERSE:
1919  return lowerVECTOR_REVERSE(Op, DAG);
1920  case ISD::BUILD_VECTOR:
1921  return lowerBUILD_VECTOR(Op, DAG, Subtarget);
1922  case ISD::VECTOR_SHUFFLE:
1923  return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
1924  case ISD::CONCAT_VECTORS: {
1925  // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
1926  // better than going through the stack, as the default expansion does.
1927  SDLoc DL(Op);
1928  MVT VT = Op.getSimpleValueType();
1929  unsigned NumOpElts =
1930  Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
1931  SDValue Vec = DAG.getUNDEF(VT);
1932  for (const auto &OpIdx : enumerate(Op->ops()))
1933  Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
1934  DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
1935  return Vec;
1936  }
1937  case ISD::LOAD:
1938  return lowerFixedLengthVectorLoadToRVV(Op, DAG);
1939  case ISD::STORE:
1940  return lowerFixedLengthVectorStoreToRVV(Op, DAG);
1941  case ISD::MLOAD:
1942  return lowerMLOAD(Op, DAG);
1943  case ISD::MSTORE:
1944  return lowerMSTORE(Op, DAG);
1945  case ISD::SETCC:
1946  return lowerFixedLengthVectorSetccToRVV(Op, DAG);
1947  case ISD::ADD:
1948  return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
1949  case ISD::SUB:
1950  return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
1951  case ISD::MUL:
1952  return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
1953  case ISD::MULHS:
1954  return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
1955  case ISD::MULHU:
1956  return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
1957  case ISD::AND:
1958  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
1960  case ISD::OR:
1961  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
1962  RISCVISD::OR_VL);
1963  case ISD::XOR:
1964  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
1966  case ISD::SDIV:
1967  return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
1968  case ISD::SREM:
1969  return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
1970  case ISD::UDIV:
1971  return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
1972  case ISD::UREM:
1973  return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
1974  case ISD::SHL:
1975  return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
1976  case ISD::SRA:
1977  return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
1978  case ISD::SRL:
1979  return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
1980  case ISD::FADD:
1981  return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
1982  case ISD::FSUB:
1983  return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
1984  case ISD::FMUL:
1985  return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
1986  case ISD::FDIV:
1987  return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
1988  case ISD::FNEG:
1989  return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
1990  case ISD::FABS:
1991  return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
1992  case ISD::FSQRT:
1993  return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
1994  case ISD::FMA:
1995  return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
1996  case ISD::SMIN:
1997  return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
1998  case ISD::SMAX:
1999  return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
2000  case ISD::UMIN:
2001  return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
2002  case ISD::UMAX:
2003  return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
2004  case ISD::ABS:
2005  return lowerABS(Op, DAG);
2006  case ISD::VSELECT:
2007  return lowerFixedLengthVectorSelectToRVV(Op, DAG);
2008  case ISD::FCOPYSIGN:
2009  return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
2010  case ISD::MGATHER:
2011  return lowerMGATHER(Op, DAG);
2012  case ISD::MSCATTER:
2013  return lowerMSCATTER(Op, DAG);
2014  }
2015 }
2016 
2018  SelectionDAG &DAG, unsigned Flags) {
2019  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
2020 }
2021 
2023  SelectionDAG &DAG, unsigned Flags) {
2024  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
2025  Flags);
2026 }
2027 
2029  SelectionDAG &DAG, unsigned Flags) {
2030  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
2031  N->getOffset(), Flags);
2032 }
2033 
2035  SelectionDAG &DAG, unsigned Flags) {
2036  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
2037 }
2038 
2039 template <class NodeTy>
2040 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2041  bool IsLocal) const {
2042  SDLoc DL(N);
2043  EVT Ty = getPointerTy(DAG.getDataLayout());
2044 
2045  if (isPositionIndependent()) {
2046  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2047  if (IsLocal)
2048  // Use PC-relative addressing to access the symbol. This generates the
2049  // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
2050  // %pcrel_lo(auipc)).
2051  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2052 
2053  // Use PC-relative addressing to access the GOT for this symbol, then load
2054  // the address from the GOT. This generates the pattern (PseudoLA sym),
2055  // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
2056  return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
2057  }
2058 
2059  switch (getTargetMachine().getCodeModel()) {
2060  default:
2061  report_fatal_error("Unsupported code model for lowering");
2062  case CodeModel::Small: {
2063  // Generate a sequence for accessing addresses within the first 2 GiB of
2064  // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
2065  SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
2066  SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
2067  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2068  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
2069  }
2070  case CodeModel::Medium: {
2071  // Generate a sequence for accessing addresses within any 2GiB range within
2072  // the address space. This generates the pattern (PseudoLLA sym), which
2073  // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
2074  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2075  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2076  }
2077  }
2078 }
2079 
2080 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
2081  SelectionDAG &DAG) const {
2082  SDLoc DL(Op);
2083  EVT Ty = Op.getValueType();
2084  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2085  int64_t Offset = N->getOffset();
2086  MVT XLenVT = Subtarget.getXLenVT();
2087 
2088  const GlobalValue *GV = N->getGlobal();
2089  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
2090  SDValue Addr = getAddr(N, DAG, IsLocal);
2091 
2092  // In order to maximise the opportunity for common subexpression elimination,
2093  // emit a separate ADD node for the global address offset instead of folding
2094  // it in the global address node. Later peephole optimisations may choose to
2095  // fold it back in when profitable.
2096  if (Offset != 0)
2097  return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2098  DAG.getConstant(Offset, DL, XLenVT));
2099  return Addr;
2100 }
2101 
2102 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
2103  SelectionDAG &DAG) const {
2104  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
2105 
2106  return getAddr(N, DAG);
2107 }
2108 
2109 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
2110  SelectionDAG &DAG) const {
2111  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
2112 
2113  return getAddr(N, DAG);
2114 }
2115 
2116 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
2117  SelectionDAG &DAG) const {
2118  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
2119 
2120  return getAddr(N, DAG);
2121 }
2122 
2123 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
2124  SelectionDAG &DAG,
2125  bool UseGOT) const {
2126  SDLoc DL(N);
2127  EVT Ty = getPointerTy(DAG.getDataLayout());
2128  const GlobalValue *GV = N->getGlobal();
2129  MVT XLenVT = Subtarget.getXLenVT();
2130 
2131  if (UseGOT) {
2132  // Use PC-relative addressing to access the GOT for this TLS symbol, then
2133  // load the address from the GOT and add the thread pointer. This generates
2134  // the pattern (PseudoLA_TLS_IE sym), which expands to
2135  // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
2136  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2137  SDValue Load =
2138  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
2139 
2140  // Add the thread pointer.
2141  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2142  return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
2143  }
2144 
2145  // Generate a sequence for accessing the address relative to the thread
2146  // pointer, with the appropriate adjustment for the thread pointer offset.
2147  // This generates the pattern
2148  // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
2149  SDValue AddrHi =
2151  SDValue AddrAdd =
2153  SDValue AddrLo =
2155 
2156  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2157  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2158  SDValue MNAdd = SDValue(
2159  DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
2160  0);
2161  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
2162 }
2163 
2164 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
2165  SelectionDAG &DAG) const {
2166  SDLoc DL(N);
2167  EVT Ty = getPointerTy(DAG.getDataLayout());
2168  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
2169  const GlobalValue *GV = N->getGlobal();
2170 
2171  // Use a PC-relative addressing mode to access the global dynamic GOT address.
2172  // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
2173  // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
2174  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2175  SDValue Load =
2176  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
2177 
2178  // Prepare argument list to generate call.
2179  ArgListTy Args;
2180  ArgListEntry Entry;
2181  Entry.Node = Load;
2182  Entry.Ty = CallTy;
2183  Args.push_back(Entry);
2184 
2185  // Setup call to __tls_get_addr.
2187  CLI.setDebugLoc(DL)
2188  .setChain(DAG.getEntryNode())
2189  .setLibCallee(CallingConv::C, CallTy,
2190  DAG.getExternalSymbol("__tls_get_addr", Ty),
2191  std::move(Args));
2192 
2193  return LowerCallTo(CLI).first;
2194 }
2195 
2196 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
2197  SelectionDAG &DAG) const {
2198  SDLoc DL(Op);
2199  EVT Ty = Op.getValueType();
2200  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2201  int64_t Offset = N->getOffset();
2202  MVT XLenVT = Subtarget.getXLenVT();
2203 
2205 
2208  report_fatal_error("In GHC calling convention TLS is not supported");
2209 
2210  SDValue Addr;
2211  switch (Model) {
2212  case TLSModel::LocalExec:
2213  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
2214  break;
2215  case TLSModel::InitialExec:
2216  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
2217  break;
2220  Addr = getDynamicTLSAddr(N, DAG);
2221  break;
2222  }
2223 
2224  // In order to maximise the opportunity for common subexpression elimination,
2225  // emit a separate ADD node for the global address offset instead of folding
2226  // it in the global address node. Later peephole optimisations may choose to
2227  // fold it back in when profitable.
2228  if (Offset != 0)
2229  return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2230  DAG.getConstant(Offset, DL, XLenVT));
2231  return Addr;
2232 }
2233 
2234 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2235  SDValue CondV = Op.getOperand(0);
2236  SDValue TrueV = Op.getOperand(1);
2237  SDValue FalseV = Op.getOperand(2);
2238  SDLoc DL(Op);
2239  MVT XLenVT = Subtarget.getXLenVT();
2240 
2241  // If the result type is XLenVT and CondV is the output of a SETCC node
2242  // which also operated on XLenVT inputs, then merge the SETCC node into the
2243  // lowered RISCVISD::SELECT_CC to take advantage of the integer
2244  // compare+branch instructions. i.e.:
2245  // (select (setcc lhs, rhs, cc), truev, falsev)
2246  // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
2247  if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
2248  CondV.getOperand(0).getSimpleValueType() == XLenVT) {
2249  SDValue LHS = CondV.getOperand(0);
2250  SDValue RHS = CondV.getOperand(1);
2251  auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
2252  ISD::CondCode CCVal = CC->get();
2253 
2254  // Special case for a select of 2 constants that have a diffence of 1.
2255  // Normally this is done by DAGCombine, but if the select is introduced by
2256  // type legalization or op legalization, we miss it. Restricting to SETLT
2257  // case for now because that is what signed saturating add/sub need.
2258  // FIXME: We don't need the condition to be SETLT or even a SETCC,
2259  // but we would probably want to swap the true/false values if the condition
2260  // is SETGE/SETLE to avoid an XORI.
2261  if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
2262  CCVal == ISD::SETLT) {
2263  const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
2264  const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
2265  if (TrueVal - 1 == FalseVal)
2266  return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
2267  if (TrueVal + 1 == FalseVal)
2268  return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
2269  }
2270 
2271  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
2272 
2273  SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
2274  SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
2275  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
2276  }
2277 
2278  // Otherwise:
2279  // (select condv, truev, falsev)
2280  // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
2281  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
2282  SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
2283 
2284  SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
2285 
2286  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
2287 }
2288 
2289 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
2290  SDValue CondV = Op.getOperand(1);
2291  SDLoc DL(Op);
2292  MVT XLenVT = Subtarget.getXLenVT();
2293 
2294  if (CondV.getOpcode() == ISD::SETCC &&
2295  CondV.getOperand(0).getValueType() == XLenVT) {
2296  SDValue LHS = CondV.getOperand(0);
2297  SDValue RHS = CondV.getOperand(1);
2298  ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
2299 
2300  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
2301 
2302  SDValue TargetCC = DAG.getCondCode(CCVal);
2303  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
2304  LHS, RHS, TargetCC, Op.getOperand(2));
2305  }
2306 
2307  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
2308  CondV, DAG.getConstant(0, DL, XLenVT),
2309  DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
2310 }
2311 
2312 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2313  MachineFunction &MF = DAG.getMachineFunction();
2315 
2316  SDLoc DL(Op);
2317  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2318  getPointerTy(MF.getDataLayout()));
2319 
2320  // vastart just stores the address of the VarArgsFrameIndex slot into the
2321  // memory location argument.
2322  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2323  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
2324  MachinePointerInfo(SV));
2325 }
2326 
2327 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
2328  SelectionDAG &DAG) const {
2329  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
2330  MachineFunction &MF = DAG.getMachineFunction();
2331  MachineFrameInfo &MFI = MF.getFrameInfo();
2332  MFI.setFrameAddressIsTaken(true);
2333  Register FrameReg = RI.getFrameRegister(MF);
2334  int XLenInBytes = Subtarget.getXLen() / 8;
2335 
2336  EVT VT = Op.getValueType();
2337  SDLoc DL(Op);
2338  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2339  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2340  while (Depth--) {
2341  int Offset = -(XLenInBytes * 2);
2342  SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2343  DAG.getIntPtrConstant(Offset, DL));
2344  FrameAddr =
2345  DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2346  }
2347  return FrameAddr;
2348 }
2349 
2350 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
2351  SelectionDAG &DAG) const {
2352  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
2353  MachineFunction &MF = DAG.getMachineFunction();
2354  MachineFrameInfo &MFI = MF.getFrameInfo();
2355  MFI.setReturnAddressIsTaken(true);
2356  MVT XLenVT = Subtarget.getXLenVT();
2357  int XLenInBytes = Subtarget.getXLen() / 8;
2358 
2360  return SDValue();
2361 
2362  EVT VT = Op.getValueType();
2363  SDLoc DL(Op);
2364  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2365  if (Depth) {
2366  int Off = -XLenInBytes;
2367  SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
2368  SDValue Offset = DAG.getConstant(Off, DL, VT);
2369  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
2370  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
2371  MachinePointerInfo());
2372  }
2373 
2374  // Return the value of the return address register, marking it an implicit
2375  // live-in.
2376  Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
2377  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
2378 }
2379 
2380 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
2381  SelectionDAG &DAG) const {
2382  SDLoc DL(Op);
2383  SDValue Lo = Op.getOperand(0);
2384  SDValue Hi = Op.getOperand(1);
2385  SDValue Shamt = Op.getOperand(2);
2386  EVT VT = Lo.getValueType();
2387 
2388  // if Shamt-XLEN < 0: // Shamt < XLEN
2389  // Lo = Lo << Shamt
2390  // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
2391  // else:
2392  // Lo = 0
2393  // Hi = Lo << (Shamt-XLEN)
2394 
2395  SDValue Zero = DAG.getConstant(0, DL, VT);
2396  SDValue One = DAG.getConstant(1, DL, VT);
2397  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2398  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2399  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2400  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2401 
2402  SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2403  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2404  SDValue ShiftRightLo =
2405  DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
2406  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2407  SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2408  SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
2409 
2410  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2411 
2412  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2413  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2414 
2415  SDValue Parts[2] = {Lo, Hi};
2416  return DAG.getMergeValues(Parts, DL);
2417 }
2418 
2419 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
2420  bool IsSRA) const {
2421  SDLoc DL(Op);
2422  SDValue Lo = Op.getOperand(0);
2423  SDValue Hi = Op.getOperand(1);
2424  SDValue Shamt = Op.getOperand(2);
2425  EVT VT = Lo.getValueType();
2426 
2427  // SRA expansion:
2428  // if Shamt-XLEN < 0: // Shamt < XLEN
2429  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2430  // Hi = Hi >>s Shamt
2431  // else:
2432  // Lo = Hi >>s (Shamt-XLEN);
2433  // Hi = Hi >>s (XLEN-1)
2434  //
2435  // SRL expansion:
2436  // if Shamt-XLEN < 0: // Shamt < XLEN
2437  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
2438  // Hi = Hi >>u Shamt
2439  // else:
2440  // Lo = Hi >>u (Shamt-XLEN);
2441  // Hi = 0;
2442 
2443  unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2444 
2445  SDValue Zero = DAG.getConstant(0, DL, VT);
2446  SDValue One = DAG.getConstant(1, DL, VT);
2447  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
2448  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
2449  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
2450  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
2451 
2452  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2453  SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2454  SDValue ShiftLeftHi =
2455  DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
2456  SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2457  SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2458  SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
2459  SDValue HiFalse =
2460  IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
2461 
2462  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
2463 
2464  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2465  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2466 
2467  SDValue Parts[2] = {Lo, Hi};
2468  return DAG.getMergeValues(Parts, DL);
2469 }
2470 
2471 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
2472 // illegal (currently only vXi64 RV32).
2473 // FIXME: We could also catch non-constant sign-extended i32 values and lower
2474 // them to SPLAT_VECTOR_I64
2475 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
2476  SelectionDAG &DAG) const {
2477  SDLoc DL(Op);
2478  EVT VecVT = Op.getValueType();
2479  assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
2480  "Unexpected SPLAT_VECTOR_PARTS lowering");
2481 
2482  assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
2483  SDValue Lo = Op.getOperand(0);
2484  SDValue Hi = Op.getOperand(1);
2485 
2486  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2487  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2488  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2489  // If Hi constant is all the same sign bit as Lo, lower this as a custom
2490  // node in order to try and match RVV vector/scalar instructions.
2491  if ((LoC >> 31) == HiC)
2492  return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
2493  }
2494 
2495  // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
2496  // to accidentally sign-extend the 32-bit halves to the e64 SEW:
2497  // vmv.v.x vX, hi
2498  // vsll.vx vX, vX, /*32*/
2499  // vmv.v.x vY, lo
2500  // vsll.vx vY, vY, /*32*/
2501  // vsrl.vx vY, vY, /*32*/
2502  // vor.vv vX, vX, vY
2503  SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
2504 
2505  Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
2506  Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
2507  Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
2508 
2509  if (isNullConstant(Hi))
2510  return Lo;
2511 
2512  Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
2513  Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
2514 
2515  return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
2516 }
2517 
2518 // Custom-lower extensions from mask vectors by using a vselect either with 1
2519 // for zero/any-extension or -1 for sign-extension:
2520 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
2521 // Note that any-extension is lowered identically to zero-extension.
2522 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
2523  int64_t ExtTrueVal) const {
2524  SDLoc DL(Op);
2525  MVT VecVT = Op.getSimpleValueType();
2526  SDValue Src = Op.getOperand(0);
2527  // Only custom-lower extensions from mask types
2528  assert(Src.getValueType().isVector() &&
2530 
2531  MVT XLenVT = Subtarget.getXLenVT();
2532  SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
2533  SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
2534 
2535  if (VecVT.isScalableVector()) {
2536  // Be careful not to introduce illegal scalar types at this stage, and be
2537  // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
2538  // illegal and must be expanded. Since we know that the constants are
2539  // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
2540  bool IsRV32E64 =
2541  !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
2542 
2543  if (!IsRV32E64) {
2544  SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
2545  SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
2546  } else {
2547  SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
2548  SplatTrueVal =
2549  DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
2550  }
2551 
2552  return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
2553  }
2554 
2555  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
2556  MVT I1ContainerVT =
2558 
2559  SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
2560 
2561  SDValue Mask, VL;
2562  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2563 
2564  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
2565  SplatTrueVal =
2566  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
2567  SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
2568  SplatTrueVal, SplatZero, VL);
2569 
2570  return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
2571 }
2572 
2573 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
2574  SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
2575  MVT ExtVT = Op.getSimpleValueType();
2576  // Only custom-lower extensions from fixed-length vector types.
2577  if (!ExtVT.isFixedLengthVector())
2578  return Op;
2579  MVT VT = Op.getOperand(0).getSimpleValueType();
2580  // Grab the canonical container type for the extended type. Infer the smaller
2581  // type from that to ensure the same number of vector elements, as we know
2582  // the LMUL will be sufficient to hold the smaller type.
2583  MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
2584  // Get the extended container type manually to ensure the same number of
2585  // vector elements between source and dest.
2586  MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
2587  ContainerExtVT.getVectorElementCount());
2588 
2589  SDValue Op1 =
2590  convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
2591 
2592  SDLoc DL(Op);
2593  SDValue Mask, VL;
2594  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2595 
2596  SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
2597 
2598  return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
2599 }
2600 
2601 // Custom-lower truncations from vectors to mask vectors by using a mask and a
2602 // setcc operation:
2603 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
2604 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
2605  SelectionDAG &DAG) const {
2606  SDLoc DL(Op);
2607  EVT MaskVT = Op.getValueType();
2608  // Only expect to custom-lower truncations to mask types
2609  assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
2610  "Unexpected type for vector mask lowering");
2611  SDValue Src = Op.getOperand(0);
2612  MVT VecVT = Src.getSimpleValueType();
2613 
2614  // If this is a fixed vector, we need to convert it to a scalable vector.
2615  MVT ContainerVT = VecVT;
2616  if (VecVT.isFixedLengthVector()) {
2617  ContainerVT = getContainerForFixedLengthVector(VecVT);
2618  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2619  }
2620 
2621  SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
2622  SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2623 
2624  SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
2625  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
2626 
2627  if (VecVT.isScalableVector()) {
2628  SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
2629  return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
2630  }
2631 
2632  SDValue Mask, VL;
2633  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2634 
2635  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2636  SDValue Trunc =
2637  DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
2638  Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
2639  DAG.getCondCode(ISD::SETNE), Mask, VL);
2640  return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
2641 }
2642 
2643 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
2644 // first position of a vector, and that vector is slid up to the insert index.
2645 // By limiting the active vector length to index+1 and merging with the
2646 // original vector (with an undisturbed tail policy for elements >= VL), we
2647 // achieve the desired result of leaving all elements untouched except the one
2648 // at VL-1, which is replaced with the desired value.
2649 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2650  SelectionDAG &DAG) const {
2651  SDLoc DL(Op);
2652  MVT VecVT = Op.getSimpleValueType();
2653  SDValue Vec = Op.getOperand(0);
2654  SDValue Val = Op.getOperand(1);
2655  SDValue Idx = Op.getOperand(2);
2656 
2657  MVT ContainerVT = VecVT;
2658  // If the operand is a fixed-length vector, convert to a scalable one.
2659  if (VecVT.isFixedLengthVector()) {
2660  ContainerVT = getContainerForFixedLengthVector(VecVT);
2661  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2662  }
2663 
2664  MVT XLenVT = Subtarget.getXLenVT();
2665 
2666  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
2667  bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
2668  // Even i64-element vectors on RV32 can be lowered without scalar
2669  // legalization if the most-significant 32 bits of the value are not affected
2670  // by the sign-extension of the lower 32 bits.
2671  // TODO: We could also catch sign extensions of a 32-bit value.
2672  if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
2673  const auto *CVal = cast<ConstantSDNode>(Val);
2674  if (isInt<32>(CVal->getSExtValue())) {
2675  IsLegalInsert = true;
2676  Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
2677  }
2678  }
2679 
2680  SDValue Mask, VL;
2681  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
2682 
2683  SDValue ValInVec;
2684 
2685  if (IsLegalInsert) {
2686  unsigned Opc =
2688  if (isNullConstant(Idx)) {
2689  Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
2690  if (!VecVT.isFixedLengthVector())
2691  return Vec;
2692  return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
2693  }
2694  ValInVec =
2695  DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
2696  } else {
2697  // On RV32, i64-element vectors must be specially handled to place the
2698  // value at element 0, by using two vslide1up instructions in sequence on
2699  // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
2700  // this.
2701  SDValue One = DAG.getConstant(1, DL, XLenVT);
2702  SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
2703  SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
2704  MVT I32ContainerVT =
2705  MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
2706  SDValue I32Mask =
2707  getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
2708  // Limit the active VL to two.
2709  SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
2710  // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
2711  // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
2712  ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
2713  InsertI64VL);
2714  // First slide in the hi value, then the lo in underneath it.
2715  ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
2716  ValHi, I32Mask, InsertI64VL);
2717  ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
2718  ValLo, I32Mask, InsertI64VL);
2719  // Bitcast back to the right container type.
2720  ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
2721  }
2722 
2723  // Now that the value is in a vector, slide it into position.
2724  SDValue InsertVL =
2725  DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
2726  SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
2727  ValInVec, Idx, Mask, InsertVL);
2728  if (!VecVT.isFixedLengthVector())
2729  return Slideup;
2730  return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
2731 }
2732 
2733 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
2734 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
2735 // types this is done using VMV_X_S to allow us to glean information about the
2736 // sign bits of the result.
2737 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2738  SelectionDAG &DAG) const {
2739  SDLoc DL(Op);
2740  SDValue Idx = Op.getOperand(1);
2741  SDValue Vec = Op.getOperand(0);
2742  EVT EltVT = Op.getValueType();
2743  MVT VecVT = Vec.getSimpleValueType();
2744  MVT XLenVT = Subtarget.getXLenVT();
2745 
2746  if (VecVT.getVectorElementType() == MVT::i1) {
2747  // FIXME: For now we just promote to an i8 vector and extract from that,
2748  // but this is probably not optimal.
2750  Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
2751  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
2752  }
2753 
2754  // If this is a fixed vector, we need to convert it to a scalable vector.
2755  MVT ContainerVT = VecVT;
2756  if (VecVT.isFixedLengthVector()) {
2757  ContainerVT = getContainerForFixedLengthVector(VecVT);
2758  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2759  }
2760 
2761  // If the index is 0, the vector is already in the right position.
2762  if (!isNullConstant(Idx)) {
2763  // Use a VL of 1 to avoid processing more elements than we need.
2764  SDValue VL = DAG.getConstant(1, DL, XLenVT);
2765  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2766  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2767  Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2768  DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
2769  }
2770 
2771  if (!EltVT.isInteger()) {
2772  // Floating-point extracts are handled in TableGen.
2773  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
2774  DAG.getConstant(0, DL, XLenVT));
2775  }
2776 
2777  SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
2778  return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
2779 }
2780 
2781 // Called by type legalization to handle splat of i64 on RV32.
2782 // FIXME: We can optimize this when the type has sign or zero bits in one
2783 // of the halves.
2784 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
2785  SDValue VL, SelectionDAG &DAG) {
2786  SDValue ThirtyTwoV = DAG.getConstant(32, DL, VT);
2788  DAG.getConstant(0, DL, MVT::i32));
2790  DAG.getConstant(1, DL, MVT::i32));
2791 
2792  // vmv.v.x vX, hi
2793  // vsll.vx vX, vX, /*32*/
2794  // vmv.v.x vY, lo
2795  // vsll.vx vY, vY, /*32*/
2796  // vsrl.vx vY, vY, /*32*/
2797  // vor.vv vX, vX, vY
2799  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2800  Lo = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
2801  Lo = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL);
2802  Lo = DAG.getNode(RISCVISD::SRL_VL, DL, VT, Lo, ThirtyTwoV, Mask, VL);
2803 
2804  Hi = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Hi, VL);
2805  Hi = DAG.getNode(RISCVISD::SHL_VL, DL, VT, Hi, ThirtyTwoV, Mask, VL);
2806 
2807  return DAG.getNode(RISCVISD::OR_VL, DL, VT, Lo, Hi, Mask, VL);
2808 }
2809 
2810 // Some RVV intrinsics may claim that they want an integer operand to be
2811 // promoted or expanded.
2813  const RISCVSubtarget &Subtarget) {
2814  assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2815  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2816  "Unexpected opcode");
2817 
2818  if (!Subtarget.hasStdExtV())
2819  return SDValue();
2820 
2821  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2822  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2823  SDLoc DL(Op);
2824 
2826  RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2827  if (!II || !II->SplatOperand)
2828  return SDValue();
2829 
2830  unsigned SplatOp = II->SplatOperand + HasChain;
2831  assert(SplatOp < Op.getNumOperands());
2832 
2833  SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
2834  SDValue &ScalarOp = Operands[SplatOp];
2835  MVT OpVT = ScalarOp.getSimpleValueType();
2836  MVT XLenVT = Subtarget.getXLenVT();
2837 
2838  // If this isn't a scalar, or its type is XLenVT we're done.
2839  if (!OpVT.isScalarInteger() || OpVT == XLenVT)
2840  return SDValue();
2841 
2842  // Simplest case is that the operand needs to be promoted to XLenVT.
2843  if (OpVT.bitsLT(XLenVT)) {
2844  // If the operand is a constant, sign extend to increase our chances
2845  // of being able to use a .vi instruction. ANY_EXTEND would become a
2846  // a zero extend and the simm5 check in isel would fail.
2847  // FIXME: Should we ignore the upper bits in isel instead?
2848  unsigned ExtOpc =
2849  isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2850  ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
2851  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
2852  }
2853 
2854  // Use the previous operand to get the vXi64 VT. The result might be a mask
2855  // VT for compares. Using the previous operand assumes that the previous
2856  // operand will never have a smaller element size than a scalar operand and
2857  // that a widening operation never uses SEW=64.
2858  // NOTE: If this fails the below assert, we can probably just find the
2859  // element count from any operand or result and use it to construct the VT.
2860  assert(II->SplatOperand > 1 && "Unexpected splat operand!");
2861  MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
2862 
2863  // The more complex case is when the scalar is larger than XLenVT.
2864  assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
2865  VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
2866 
2867  // If this is a sign-extended 32-bit constant, we can truncate it and rely
2868  // on the instruction to sign-extend since SEW>XLEN.
2869  if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
2870  if (isInt<32>(CVal->getSExtValue())) {
2871  ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
2872  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
2873  }
2874  }
2875 
2876  // We need to convert the scalar to a splat vector.
2877  // FIXME: Can we implicitly truncate the scalar if it is known to
2878  // be sign extended?
2879  // VL should be the last operand.
2880  SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
2881  assert(VL.getValueType() == XLenVT);
2882  ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
2883  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
2884 }
2885 
2886 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2887  SelectionDAG &DAG) const {
2888  unsigned IntNo = Op.getConstantOperandVal(0);
2889  SDLoc DL(Op);
2890  MVT XLenVT = Subtarget.getXLenVT();
2891 
2892  switch (IntNo) {
2893  default:
2894  break; // Don't custom lower most intrinsics.
2895  case Intrinsic::thread_pointer: {
2896  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2897  return DAG.getRegister(RISCV::X4, PtrVT);
2898  }
2899  case Intrinsic::riscv_orc_b:
2900  // Lower to the GORCI encoding for orc.b.
2901  return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
2902  DAG.getConstant(7, DL, XLenVT));
2903  case Intrinsic::riscv_vmv_x_s:
2904  assert(Op.getValueType() == XLenVT && "Unexpected VT!");
2905  return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
2906  Op.getOperand(1));
2907  case Intrinsic::riscv_vmv_v_x: {
2908  SDValue Scalar = Op.getOperand(1);
2909  if (Scalar.getValueType().bitsLE(XLenVT)) {
2910  unsigned ExtOpc =
2911  isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2912  Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2913  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(), Scalar,
2914  Op.getOperand(2));
2915  }
2916 
2917  assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
2918 
2919  // If this is a sign-extended 32-bit constant, we can truncate it and rely
2920  // on the instruction to sign-extend since SEW>XLEN.
2921  if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar)) {
2922  if (isInt<32>(CVal->getSExtValue()))
2923  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, Op.getValueType(),
2924  DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32),
2925  Op.getOperand(2));
2926  }
2927 
2928  // Otherwise use the more complicated splatting algorithm.
2929  return splatSplitI64WithVL(DL, Op.getSimpleValueType(), Scalar,
2930  Op.getOperand(2), DAG);
2931  }
2932  case Intrinsic::riscv_vfmv_v_f:
2933  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
2934  Op.getOperand(1), Op.getOperand(2));
2935  case Intrinsic::riscv_vmv_s_x: {
2936  SDValue Scalar = Op.getOperand(2);
2937 
2938  if (Scalar.getValueType().bitsLE(XLenVT)) {
2939  Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
2940  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
2941  Op.getOperand(1), Scalar, Op.getOperand(3));
2942  }
2943 
2944  assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
2945 
2946  // This is an i64 value that lives in two scalar registers. We have to
2947  // insert this in a convoluted way. First we build vXi64 splat containing
2948  // the/ two values that we assemble using some bit math. Next we'll use
2949  // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
2950  // to merge element 0 from our splat into the source vector.
2951  // FIXME: This is probably not the best way to do this, but it is
2952  // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
2953  // point.
2954  // vmv.v.x vX, hi
2955  // vsll.vx vX, vX, /*32*/
2956  // vmv.v.x vY, lo
2957  // vsll.vx vY, vY, /*32*/
2958  // vsrl.vx vY, vY, /*32*/
2959  // vor.vv vX, vX, vY
2960  //
2961  // vid.v vVid
2962  // vmseq.vx mMask, vVid, 0
2963  // vmerge.vvm vDest, vSrc, vVal, mMask
2964  MVT VT = Op.getSimpleValueType();
2965  SDValue Vec = Op.getOperand(1);
2966  SDValue VL = Op.getOperand(3);
2967 
2968  SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
2969  SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
2970  DAG.getConstant(0, DL, MVT::i32), VL);
2971 
2973  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2974  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
2975  SDValue SelectCond =
2976  DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
2977  DAG.getCondCode(ISD::SETEQ), Mask, VL);
2978  return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
2979  Vec, VL);
2980  }
2981  case Intrinsic::riscv_vslide1up:
2982  case Intrinsic::riscv_vslide1down:
2983  case Intrinsic::riscv_vslide1up_mask:
2984  case Intrinsic::riscv_vslide1down_mask: {
2985  // We need to special case these when the scalar is larger than XLen.
2986  unsigned NumOps = Op.getNumOperands();
2987  bool IsMasked = NumOps == 6;
2988  unsigned OpOffset = IsMasked ? 1 : 0;
2989  SDValue Scalar = Op.getOperand(2 + OpOffset);
2990  if (Scalar.getValueType().bitsLE(XLenVT))
2991  break;
2992 
2993  // Splatting a sign extended constant is fine.
2994  if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
2995  if (isInt<32>(CVal->getSExtValue()))
2996  break;
2997 
2998  MVT VT = Op.getSimpleValueType();
3000  Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
3001 
3002  // Convert the vector source to the equivalent nxvXi32 vector.
3004  SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
3005 
3006  SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3007  DAG.getConstant(0, DL, XLenVT));
3008  SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3009  DAG.getConstant(1, DL, XLenVT));
3010 
3011  // Double the VL since we halved SEW.
3012  SDValue VL = Op.getOperand(NumOps - 1);
3013  SDValue I32VL =
3014  DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
3015 
3016  MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
3017  SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
3018 
3019  // Shift the two scalar parts in using SEW=32 slide1up/slide1down
3020  // instructions.
3021  if (IntNo == Intrinsic::riscv_vslide1up ||
3022  IntNo == Intrinsic::riscv_vslide1up_mask) {
3023  Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
3024  I32Mask, I32VL);
3025  Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
3026  I32Mask, I32VL);
3027  } else {
3028  Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
3029  I32Mask, I32VL);
3030  Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
3031  I32Mask, I32VL);
3032  }
3033 
3034  // Convert back to nxvXi64.
3035  Vec = DAG.getBitcast(VT, Vec);
3036 
3037  if (!IsMasked)
3038  return Vec;
3039 
3040  // Apply mask after the operation.
3041  SDValue Mask = Op.getOperand(NumOps - 2);
3042  SDValue MaskedOff = Op.getOperand(1);
3043  return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
3044  }
3045  }
3046 
3047  return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3048 }
3049 
3050 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
3051  SelectionDAG &DAG) const {
3052  return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3053 }
3054 
3055 static MVT getLMUL1VT(MVT VT) {
3056  assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3057  "Unexpected vector MVT");
3058  return MVT::getScalableVectorVT(
3059  VT.getVectorElementType(),
3061 }
3062 
3063 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
3064  switch (ISDOpcode) {
3065  default:
3066  llvm_unreachable("Unhandled reduction");
3067  case ISD::VECREDUCE_ADD:
3069  case ISD::VECREDUCE_UMAX:
3071  case ISD::VECREDUCE_SMAX:
3073  case ISD::VECREDUCE_UMIN:
3075  case ISD::VECREDUCE_SMIN:
3077  case ISD::VECREDUCE_AND:
3079  case ISD::VECREDUCE_OR:
3081  case ISD::VECREDUCE_XOR:
3083  }
3084 }
3085 
3086 SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
3087  SelectionDAG &DAG) const {
3088  SDLoc DL(Op);
3089  SDValue Vec = Op.getOperand(0);
3090  MVT VecVT = Vec.getSimpleValueType();
3091  assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
3092  Op.getOpcode() == ISD::VECREDUCE_OR ||
3093  Op.getOpcode() == ISD::VECREDUCE_XOR) &&
3094  "Unexpected reduction lowering");
3095 
3096  MVT XLenVT = Subtarget.getXLenVT();
3097  assert(Op.getValueType() == XLenVT &&
3098  "Expected reduction output to be legalized to XLenVT");
3099 
3100  MVT ContainerVT = VecVT;
3101  if (VecVT.isFixedLengthVector()) {
3102  ContainerVT = getContainerForFixedLengthVector(VecVT);
3103  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3104  }
3105 
3106  SDValue Mask, VL;
3107  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3108  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3109 
3110  switch (Op.getOpcode()) {
3111  default:
3112  llvm_unreachable("Unhandled reduction");
3113  case ISD::VECREDUCE_AND:
3114  // vpopc ~x == 0
3115  Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, Mask, VL);
3116  Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3117  return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETEQ);
3118  case ISD::VECREDUCE_OR:
3119  // vpopc x != 0
3120  Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3121  return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
3122  case ISD::VECREDUCE_XOR: {
3123  // ((vpopc x) & 1) != 0
3124  SDValue One = DAG.getConstant(1, DL, XLenVT);
3125  Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3126  Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
3127  return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
3128  }
3129  }
3130 }
3131 
3132 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
3133  SelectionDAG &DAG) const {
3134  SDLoc DL(Op);
3135  SDValue Vec = Op.getOperand(0);
3136  EVT VecEVT = Vec.getValueType();
3137 
3138  unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
3139 
3140  // Due to ordering in legalize types we may have a vector type that needs to
3141  // be split. Do that manually so we can get down to a legal type.
3142  while (getTypeAction(*DAG.getContext(), VecEVT) ==
3144  SDValue Lo, Hi;
3145  std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
3146  VecEVT = Lo.getValueType();
3147  Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
3148  }
3149 
3150  // TODO: The type may need to be widened rather than split. Or widened before
3151  // it can be split.
3152  if (!isTypeLegal(VecEVT))
3153  return SDValue();
3154 
3155  MVT VecVT = VecEVT.getSimpleVT();
3156  MVT VecEltVT = VecVT.getVectorElementType();
3157  unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
3158 
3159  MVT ContainerVT = VecVT;
3160  if (VecVT.isFixedLengthVector()) {
3161  ContainerVT = getContainerForFixedLengthVector(VecVT);
3162  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3163  }
3164 
3165  MVT M1VT = getLMUL1VT(ContainerVT);
3166 
3167  SDValue Mask, VL;
3168  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3169 
3170  // FIXME: This is a VLMAX splat which might be too large and can prevent
3171  // vsetvli removal.
3172  SDValue NeutralElem =
3173  DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
3174  SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
3175  SDValue Reduction =
3176  DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL);
3177  SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
3178  DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3179  return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
3180 }
3181 
3182 // Given a reduction op, this function returns the matching reduction opcode,
3183 // the vector SDValue and the scalar SDValue required to lower this to a
3184 // RISCVISD node.
3185 static std::tuple<unsigned, SDValue, SDValue>
3187  SDLoc DL(Op);
3188  switch (Op.getOpcode()) {
3189  default:
3190  llvm_unreachable("Unhandled reduction");
3191  case ISD::VECREDUCE_FADD:
3192  return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
3193  DAG.getConstantFP(0.0, DL, EltVT));
3195  return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
3196  Op.getOperand(0));
3197  }
3198 }
3199 
3200 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
3201  SelectionDAG &DAG) const {
3202  SDLoc DL(Op);
3203  MVT VecEltVT = Op.getSimpleValueType();
3204 
3205  unsigned RVVOpcode;
3206  SDValue VectorVal, ScalarVal;
3207  std::tie(RVVOpcode, VectorVal, ScalarVal) =
3208  getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
3209  MVT VecVT = VectorVal.getSimpleValueType();
3210 
3211  MVT ContainerVT = VecVT;
3212  if (VecVT.isFixedLengthVector()) {
3213  ContainerVT = getContainerForFixedLengthVector(VecVT);
3214  VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
3215  }
3216 
3217  MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
3218 
3219  SDValue Mask, VL;
3220  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3221 
3222  // FIXME: This is a VLMAX splat which might be too large and can prevent
3223  // vsetvli removal.
3224  SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
3225  SDValue Reduction =
3226  DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL);
3227  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
3228  DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3229 }
3230 
3231 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
3232  SelectionDAG &DAG) const {
3233  SDValue Vec = Op.getOperand(0);
3234  SDValue SubVec = Op.getOperand(1);
3235  MVT VecVT = Vec.getSimpleValueType();
3236  MVT SubVecVT = SubVec.getSimpleValueType();
3237 
3238  SDLoc DL(Op);
3239  MVT XLenVT = Subtarget.getXLenVT();
3240  unsigned OrigIdx = Op.getConstantOperandVal(2);
3241  const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3242 
3243  // We don't have the ability to slide mask vectors up indexed by their i1
3244  // elements; the smallest we can do is i8. Often we are able to bitcast to
3245  // equivalent i8 vectors. Note that when inserting a fixed-length vector
3246  // into a scalable one, we might not necessarily have enough scalable
3247  // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
3248  if (SubVecVT.getVectorElementType() == MVT::i1 &&
3249  (OrigIdx != 0 || !Vec.isUndef())) {
3250  if (VecVT.getVectorMinNumElements() >= 8 &&
3251  SubVecVT.getVectorMinNumElements() >= 8) {
3252  assert(OrigIdx % 8 == 0 && "Invalid index");
3253  assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
3254  SubVecVT.getVectorMinNumElements() % 8 == 0 &&
3255  "Unexpected mask vector lowering");
3256  OrigIdx /= 8;
3257  SubVecVT =
3259  SubVecVT.isScalableVector());
3260  VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
3261  VecVT.isScalableVector());
3262  Vec = DAG.getBitcast(VecVT, Vec);
3263  SubVec = DAG.getBitcast(SubVecVT, SubVec);
3264  } else {
3265  // We can't slide this mask vector up indexed by its i1 elements.
3266  // This poses a problem when we wish to insert a scalable vector which
3267  // can't be re-expressed as a larger type. Just choose the slow path and
3268  // extend to a larger type, then truncate back down.
3269  MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
3270  MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
3271  Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
3272  SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
3273  Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
3274  Op.getOperand(2));
3275  SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
3276  return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
3277  }
3278  }
3279 
3280  // If the subvector vector is a fixed-length type, we cannot use subregister
3281  // manipulation to simplify the codegen; we don't know which register of a
3282  // LMUL group contains the specific subvector as we only know the minimum
3283  // register size. Therefore we must slide the vector group up the full
3284  // amount.
3285  if (SubVecVT.isFixedLengthVector()) {
3286  if (OrigIdx == 0 && Vec.isUndef())
3287  return Op;
3288  MVT ContainerVT = VecVT;
3289  if (VecVT.isFixedLengthVector()) {
3290  ContainerVT = getContainerForFixedLengthVector(VecVT);
3291  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3292  }
3293  SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3294  DAG.getUNDEF(ContainerVT), SubVec,
3295  DAG.getConstant(0, DL, XLenVT));
3296  SDValue Mask =
3297  getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
3298  // Set the vector length to only the number of elements we care about. Note
3299  // that for slideup this includes the offset.
3300  SDValue VL =
3301  DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
3302  SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
3303  SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
3304  SubVec, SlideupAmt, Mask, VL);
3305  if (VecVT.isFixedLengthVector())
3306  Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
3307  return DAG.getBitcast(Op.getValueType(), Slideup);
3308  }
3309 
3310  unsigned SubRegIdx, RemIdx;
3311  std::tie(SubRegIdx, RemIdx) =
3313  VecVT, SubVecVT, OrigIdx, TRI);
3314 
3315  RISCVVLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
3316  bool IsSubVecPartReg = SubVecLMUL == RISCVVLMUL::LMUL_F2 ||
3317  SubVecLMUL == RISCVVLMUL::LMUL_F4 ||
3318  SubVecLMUL == RISCVVLMUL::LMUL_F8;
3319 
3320  // 1. If the Idx has been completely eliminated and this subvector's size is
3321  // a vector register or a multiple thereof, or the surrounding elements are
3322  // undef, then this is a subvector insert which naturally aligns to a vector
3323  // register. These can easily be handled using subregister manipulation.
3324  // 2. If the subvector is smaller than a vector register, then the insertion
3325  // must preserve the undisturbed elements of the register. We do this by
3326  // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
3327  // (which resolves to a subregister copy), performing a VSLIDEUP to place the
3328  // subvector within the vector register, and an INSERT_SUBVECTOR of that
3329  // LMUL=1 type back into the larger vector (resolving to another subregister
3330  // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
3331  // to avoid allocating a large register group to hold our subvector.
3332  if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
3333  return Op;
3334 
3335  // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
3336  // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
3337  // (in our case undisturbed). This means we can set up a subvector insertion
3338  // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
3339  // size of the subvector.
3340  MVT InterSubVT = VecVT;
3341  SDValue AlignedExtract = Vec;
3342  unsigned AlignedIdx = OrigIdx - RemIdx;
3343  if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
3344  InterSubVT = getLMUL1VT(VecVT);
3345  // Extract a subvector equal to the nearest full vector register type. This
3346  // should resolve to a EXTRACT_SUBREG instruction.
3347  AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
3348  DAG.getConstant(AlignedIdx, DL, XLenVT));
3349  }
3350 
3351  SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
3352  // For scalable vectors this must be further multiplied by vscale.
3353  SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
3354 
3355  SDValue Mask, VL;
3356  std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
3357 
3358  // Construct the vector length corresponding to RemIdx + length(SubVecVT).
3359  VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
3360  VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
3361  VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
3362 
3363  SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
3364  DAG.getUNDEF(InterSubVT), SubVec,
3365  DAG.getConstant(0, DL, XLenVT));
3366 
3367  SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
3368  AlignedExtract, SubVec, SlideupAmt, Mask, VL);
3369 
3370  // If required, insert this subvector back into the correct vector register.
3371  // This should resolve to an INSERT_SUBREG instruction.
3372  if (VecVT.bitsGT(InterSubVT))
3373  Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
3374  DAG.getConstant(AlignedIdx, DL, XLenVT));
3375 
3376  // We might have bitcast from a mask type: cast back to the original type if
3377  // required.
3378  return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
3379 }
3380 
3381 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
3382  SelectionDAG &DAG) const {
3383  SDValue Vec = Op.getOperand(0);
3384  MVT SubVecVT = Op.getSimpleValueType();
3385  MVT VecVT = Vec.getSimpleValueType();
3386 
3387  SDLoc DL(Op);
3388  MVT XLenVT = Subtarget.getXLenVT();
3389  unsigned OrigIdx = Op.getConstantOperandVal(1);
3390  const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3391 
3392  // We don't have the ability to slide mask vectors down indexed by their i1
3393  // elements; the smallest we can do is i8. Often we are able to bitcast to
3394  // equivalent i8 vectors. Note that when extracting a fixed-length vector
3395  // from a scalable one, we might not necessarily have enough scalable
3396  // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
3397  if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
3398  if (VecVT.getVectorMinNumElements() >= 8 &&
3399  SubVecVT.getVectorMinNumElements() >= 8) {
3400  assert(OrigIdx % 8 == 0 && "Invalid index");
3401  assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
3402  SubVecVT.getVectorMinNumElements() % 8 == 0 &&
3403  "Unexpected mask vector lowering");
3404  OrigIdx /= 8;
3405  SubVecVT =
3407  SubVecVT.isScalableVector());
3408  VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
3409  VecVT.isScalableVector());
3410  Vec = DAG.getBitcast(VecVT, Vec);
3411  } else {
3412  // We can't slide this mask vector down, indexed by its i1 elements.
3413  // This poses a problem when we wish to extract a scalable vector which
3414  // can't be re-expressed as a larger type. Just choose the slow path and
3415  // extend to a larger type, then truncate back down.
3416  // TODO: We could probably improve this when extracting certain fixed
3417  // from fixed, where we can extract as i8 and shift the correct element
3418  // right to reach the desired subvector?
3419  MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
3420  MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
3421  Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
3422  Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
3423  Op.getOperand(1));
3424  SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
3425  return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
3426  }
3427  }
3428 
3429  // If the subvector vector is a fixed-length type, we cannot use subregister
3430  // manipulation to simplify the codegen; we don't know which register of a
3431  // LMUL group contains the specific subvector as we only know the minimum
3432  // register size. Therefore we must slide the vector group down the full
3433  // amount.
3434  if (SubVecVT.isFixedLengthVector()) {
3435  // With an index of 0 this is a cast-like subvector, which can be performed
3436  // with subregister operations.
3437  if (OrigIdx == 0)
3438  return Op;
3439  MVT ContainerVT = VecVT;
3440  if (VecVT.isFixedLengthVector()) {
3441  ContainerVT = getContainerForFixedLengthVector(VecVT);
3442  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3443  }
3444  SDValue Mask =
3445  getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
3446  // Set the vector length to only the number of elements we care about. This
3447  // avoids sliding down elements we're going to discard straight away.
3448  SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
3449  SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
3450  SDValue Slidedown =
3451  DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3452  DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
3453  // Now we can use a cast-like subvector extract to get the result.
3454  Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
3455  DAG.getConstant(0, DL, XLenVT));
3456  return DAG.getBitcast(Op.getValueType(), Slidedown);
3457  }
3458 
3459  unsigned SubRegIdx, RemIdx;
3460  std::tie(SubRegIdx, RemIdx) =
3462  VecVT, SubVecVT, OrigIdx, TRI);
3463 
3464  // If the Idx has been completely eliminated then this is a subvector extract
3465  // which naturally aligns to a vector register. These can easily be handled
3466  // using subregister manipulation.
3467  if (RemIdx == 0)
3468  return Op;
3469 
3470  // Else we must shift our vector register directly to extract the subvector.
3471  // Do this using VSLIDEDOWN.
3472 
3473  // If the vector type is an LMUL-group type, extract a subvector equal to the
3474  // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
3475  // instruction.
3476  MVT InterSubVT = VecVT;
3477  if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
3478  InterSubVT = getLMUL1VT(VecVT);
3479  Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
3480  DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
3481  }
3482 
3483  // Slide this vector register down by the desired number of elements in order
3484  // to place the desired subvector starting at element 0.
3485  SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
3486  // For scalable vectors this must be further multiplied by vscale.
3487  SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
3488 
3489  SDValue Mask, VL;
3490  std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
3491  SDValue Slidedown =
3492  DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
3493  DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
3494 
3495  // Now the vector is in the right position, extract our final subvector. This
3496  // should resolve to a COPY.
3497  Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
3498  DAG.getConstant(0, DL, XLenVT));
3499 
3500  // We might have bitcast from a mask type: cast back to the original type if
3501  // required.
3502  return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
3503 }
3504 
3505 // Implement step_vector to the vid instruction.
3506 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
3507  SelectionDAG &DAG) const {
3508  SDLoc DL(Op);
3509  assert(Op.getConstantOperandAPInt(0) == 1 && "Unexpected step value");
3510  MVT VT = Op.getSimpleValueType();
3511  SDValue Mask, VL;
3512  std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
3513  return DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
3514 }
3515 
3516 // Implement vector_reverse using vrgather.vv with indices determined by
3517 // subtracting the id of each element from (VLMAX-1). This will convert
3518 // the indices like so:
3519 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
3520 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
3521 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
3522  SelectionDAG &DAG) const {
3523  SDLoc DL(Op);
3524  MVT VecVT = Op.getSimpleValueType();
3525  unsigned EltSize = VecVT.getScalarSizeInBits();
3526  unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3527 
3528  unsigned MaxVLMAX = 0;
3529  unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
3530  if (VectorBitsMax != 0)
3531  MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
3532 
3533  unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
3534  MVT IntVT = VecVT.changeVectorElementTypeToInteger();
3535 
3536  // If this is SEW=8 and VLMAX is unknown or more than 256, we need
3537  // to use vrgatherei16.vv.
3538  // TODO: It's also possible to use vrgatherei16.vv for other types to
3539  // decrease register width for the index calculation.
3540  if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
3541  // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
3542  // Reverse each half, then reassemble them in reverse order.
3543  // NOTE: It's also possible that after splitting that VLMAX no longer
3544  // requires vrgatherei16.vv.
3545  if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
3546  SDValue Lo, Hi;
3547  std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
3548  EVT LoVT, HiVT;
3549  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
3550  Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
3551  Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
3552  // Reassemble the low and high pieces reversed.
3553  // FIXME: This is a CONCAT_VECTORS.
3554  SDValue Res =
3555  DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
3556  DAG.getIntPtrConstant(0, DL));
3557  return DAG.getNode(
3558  ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
3560  }
3561 
3562  // Just promote the int type to i16 which will double the LMUL.
3564  GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
3565  }
3566 
3567  MVT XLenVT = Subtarget.getXLenVT();
3568  SDValue Mask, VL;
3569  std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
3570 
3571  // Calculate VLMAX-1 for the desired SEW.
3572  unsigned MinElts = VecVT.getVectorMinNumElements();
3573  SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
3574  DAG.getConstant(MinElts, DL, XLenVT));
3575  SDValue VLMinus1 =
3576  DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
3577 
3578  // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
3579  bool IsRV32E64 =
3580  !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
3581  SDValue SplatVL;
3582  if (!IsRV32E64)
3583  SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
3584  else
3585  SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
3586 
3587  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
3588  SDValue Indices =
3589  DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
3590 
3591  return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
3592 }
3593 
3594 SDValue
3595 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
3596  SelectionDAG &DAG) const {
3597  auto *Load = cast<LoadSDNode>(Op);
3598 
3599  SDLoc DL(Op);
3600  MVT VT = Op.getSimpleValueType();
3601  MVT ContainerVT = getContainerForFixedLengthVector(VT);
3602 
3603  SDValue VL =
3604  DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
3605 
3606  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
3607  SDValue NewLoad = DAG.getMemIntrinsicNode(
3608  RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
3609  Load->getMemoryVT(), Load->getMemOperand());
3610 
3611  SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
3612  return DAG.getMergeValues({Result, Load->getChain()}, DL);
3613 }
3614 
3615 SDValue
3616 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
3617  SelectionDAG &DAG) const {
3618  auto *Store = cast<StoreSDNode>(Op);
3619 
3620  SDLoc DL(Op);
3621  MVT VT = Store->getValue().getSimpleValueType();
3622 
3623  // FIXME: We probably need to zero any extra bits in a byte for mask stores.
3624  // This is tricky to do.
3625 
3626  MVT ContainerVT = getContainerForFixedLengthVector(VT);
3627 
3628  SDValue VL =
3629  DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
3630 
3631  SDValue NewValue =
3632  convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget);
3633  return DAG.getMemIntrinsicNode(
3635  {Store->getChain(), NewValue, Store->getBasePtr(), VL},
3636  Store->getMemoryVT(), Store->getMemOperand());
3637 }
3638 
3639 SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
3640  auto *Load = cast<MaskedLoadSDNode>(Op);
3641 
3642  SDLoc DL(Op);
3643  MVT VT = Op.getSimpleValueType();
3644  MVT XLenVT = Subtarget.getXLenVT();
3645 
3646  SDValue Mask = Load->getMask();
3647  SDValue PassThru = Load->getPassThru();
3648  SDValue VL;
3649 
3650  MVT ContainerVT = VT;
3651  if (VT.isFixedLengthVector()) {
3652  ContainerVT = getContainerForFixedLengthVector(VT);
3653  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3654 
3655  Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
3656  PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
3657  VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
3658  } else
3659  VL = DAG.getRegister(RISCV::X0, XLenVT);
3660 
3661  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
3662  SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT);
3663  SDValue Ops[] = {Load->getChain(), IntID, PassThru,
3664  Load->getBasePtr(), Mask, VL};
3665  SDValue Result =
3667  Load->getMemoryVT(), Load->getMemOperand());
3668  SDValue Chain = Result.getValue(1);
3669 
3670  if (VT.isFixedLengthVector())
3671  Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
3672 
3673  return DAG.getMergeValues({Result, Chain}, DL);
3674 }
3675 
3676 SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const {
3677  auto *Store = cast<MaskedStoreSDNode>(Op);
3678 
3679  SDLoc DL(Op);
3680  SDValue Val = Store->getValue();
3681  SDValue Mask = Store->getMask();
3682  MVT VT = Val.getSimpleValueType();
3683  MVT XLenVT = Subtarget.getXLenVT();
3684  SDValue VL;
3685 
3686  MVT ContainerVT = VT;
3687  if (VT.isFixedLengthVector()) {
3688  ContainerVT = getContainerForFixedLengthVector(VT);
3689  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3690 
3691  Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
3692  Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
3693  VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
3694  } else
3695  VL = DAG.getRegister(RISCV::X0, XLenVT);
3696 
3697  SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT);
3698  return DAG.getMemIntrinsicNode(
3700  {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL},
3701  Store->getMemoryVT(), Store->getMemOperand());
3702 }
3703 
3704 SDValue
3705 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
3706  SelectionDAG &DAG) const {
3707  MVT InVT = Op.getOperand(0).getSimpleValueType();
3708  MVT ContainerVT = getContainerForFixedLengthVector(InVT);
3709 
3710  MVT VT = Op.getSimpleValueType();
3711 
3712  SDValue Op1 =
3713  convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
3714  SDValue Op2 =
3715  convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
3716 
3717  SDLoc DL(Op);
3718  SDValue VL =
3719  DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
3720 
3721  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3722  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3723 
3724  SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
3725  Op.getOperand(2), Mask, VL);
3726 
3727  return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
3728 }
3729 
3730 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
3731  SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
3732  MVT VT = Op.getSimpleValueType();
3733 
3734  if (VT.getVectorElementType() == MVT::i1)
3735  return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
3736 
3737  return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
3738 }
3739 
3740 // Lower vector ABS to smax(X, sub(0, X)).
3741 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
3742  SDLoc DL(Op);
3743  MVT VT = Op.getSimpleValueType();
3744  SDValue X = Op.getOperand(0);
3745 
3746  assert(VT.isFixedLengthVector() && "Unexpected type");
3747 
3748  MVT ContainerVT = getContainerForFixedLengthVector(VT);
3749  X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
3750 
3751  SDValue Mask, VL;
3752  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3753 
3754  SDValue SplatZero =
3755  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
3756  DAG.getConstant(0, DL, Subtarget.getXLenVT()));
3757  SDValue NegX =
3758  DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
3759  SDValue Max =
3760  DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
3761 
3762  return convertFromScalableVector(VT, Max, DAG, Subtarget);
3763 }
3764 
3765 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
3766  SDValue Op, SelectionDAG &DAG) const {
3767  SDLoc DL(Op);
3768  MVT VT = Op.getSimpleValueType();
3769  SDValue Mag = Op.getOperand(0);
3770  SDValue Sign = Op.getOperand(1);
3771  assert(Mag.getValueType() == Sign.getValueType() &&
3772  "Can only handle COPYSIGN with matching types.");
3773 
3774  MVT ContainerVT = getContainerForFixedLengthVector(VT);
3775  Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
3776  Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
3777 
3778  SDValue Mask, VL;
3779  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3780 
3781  SDValue CopySign =
3782  DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
3783 
3784  return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
3785 }
3786 
3787 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
3788  SDValue Op, SelectionDAG &DAG) const {
3789  MVT VT = Op.getSimpleValueType();
3790  MVT ContainerVT = getContainerForFixedLengthVector(VT);
3791 
3792  MVT I1ContainerVT =
3794 
3795  SDValue CC =
3796  convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
3797  SDValue Op1 =
3798  convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
3799  SDValue Op2 =
3800  convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
3801 
3802  SDLoc DL(Op);
3803  SDValue Mask, VL;
3804  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3805 
3806  SDValue Select =
3807  DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
3808 
3809  return convertFromScalableVector(VT, Select, DAG, Subtarget);
3810 }
3811 
3812 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
3813  unsigned NewOpc,
3814  bool HasMask) const {
3815  MVT VT = Op.getSimpleValueType();
3816  assert(useRVVForFixedLengthVectorVT(VT) &&
3817  "Only expected to lower fixed length vector operation!");
3818  MVT ContainerVT = getContainerForFixedLengthVector(VT);
3819 
3820  // Create list of operands by converting existing ones to scalable types.
3822  for (const SDValue &V : Op->op_values()) {
3823  assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
3824 
3825  // Pass through non-vector operands.
3826  if (!V.getValueType().isVector()) {
3827  Ops.push_back(V);
3828  continue;
3829  }
3830 
3831  // "cast" fixed length vector to a scalable vector.
3832  assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
3833  "Only fixed length vectors are supported!");
3834  Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
3835  }
3836 
3837  SDLoc DL(Op);
3838  SDValue Mask, VL;
3839  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3840  if (HasMask)
3841  Ops.push_back(Mask);
3842  Ops.push_back(VL);
3843 
3844  SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
3845  return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
3846 }
3847 
3848 // Custom lower MGATHER to a legalized form for RVV. It will then be matched to
3849 // a RVV indexed load. The RVV indexed load instructions only support the
3850 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
3851 // truncated to XLEN and are treated as byte offsets. Any signed or scaled
3852 // indexing is extended to the XLEN value type and scaled accordingly.
3853 SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
3854  auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
3855  SDLoc DL(Op);
3856 
3857  SDValue Index = MGN->getIndex();
3858  SDValue Mask = MGN->getMask();
3859  SDValue PassThru = MGN->getPassThru();
3860 
3861  MVT VT = Op.getSimpleValueType();
3862  MVT IndexVT = Index.getSimpleValueType();
3863  MVT XLenVT = Subtarget.getXLenVT();
3864 
3866  "Unexpected VTs!");
3867  assert(MGN->getBasePtr().getSimpleValueType() == XLenVT &&
3868  "Unexpected pointer type");
3869  // Targets have to explicitly opt-in for extending vector loads.
3870  assert(MGN->getExtensionType() == ISD::NON_EXTLOAD &&
3871  "Unexpected extending MGATHER");
3872 
3873  // If the mask is known to be all ones, optimize to an unmasked intrinsic;
3874  // the selection of the masked intrinsics doesn't do this for us.
3875  bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
3876 
3877  SDValue VL;
3878  MVT ContainerVT = VT;
3879  if (VT.isFixedLengthVector()) {
3880  // We need to use the larger of the result and index type to determine the
3881  // scalable type to use so we don't increase LMUL for any operand/result.
3882  if (VT.bitsGE(IndexVT)) {
3883  ContainerVT = getContainerForFixedLengthVector(VT);
3884  IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
3885  ContainerVT.getVectorElementCount());
3886  } else {
3887  IndexVT = getContainerForFixedLengthVector(IndexVT);
3888  ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
3889  IndexVT.getVectorElementCount());
3890  }
3891 
3892  Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
3893 
3894  if (!IsUnmasked) {
3895  MVT MaskVT =
3897  Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
3898  PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
3899  }
3900 
3901  VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
3902  } else
3903  VL = DAG.getRegister(RISCV::X0, XLenVT);
3904 
3905  unsigned IntID =
3906  IsUnmasked ? Intrinsic::riscv_vloxei : Intrinsic::riscv_vloxei_mask;
3907  SmallVector<SDValue, 8> Ops{MGN->getChain(),
3908  DAG.getTargetConstant(IntID, DL, XLenVT)};
3909  if (!IsUnmasked)
3910  Ops.push_back(PassThru);
3911  Ops.push_back(MGN->getBasePtr());
3912  Ops.push_back(Index);
3913  if (!IsUnmasked)
3914  Ops.push_back(Mask);
3915  Ops.push_back(VL);
3916 
3917  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
3918  SDValue Result =
3920  MGN->getMemoryVT(), MGN->getMemOperand());
3921  SDValue Chain = Result.getValue(1);
3922 
3923  if (VT.isFixedLengthVector())
3924  Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
3925 
3926  return DAG.getMergeValues({Result, Chain}, DL);
3927 }
3928 
3929 // Custom lower MSCATTER to a legalized form for RVV. It will then be matched to
3930 // a RVV indexed store. The RVV indexed store instructions only support the
3931 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
3932 // truncated to XLEN and are treated as byte offsets. Any signed or scaled
3933 // indexing is extended to the XLEN value type and scaled accordingly.
3934 SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
3935  SelectionDAG &DAG) const {
3936  auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
3937  SDLoc DL(Op);
3938  SDValue Index = MSN->getIndex();
3939  SDValue Mask = MSN->getMask();
3940  SDValue Val = MSN->getValue();
3941 
3942  MVT VT = Val.getSimpleValueType();
3943  MVT IndexVT = Index.getSimpleValueType();
3944  MVT XLenVT = Subtarget.getXLenVT();
3945 
3947  "Unexpected VTs!");
3948  assert(MSN->getBasePtr().getSimpleValueType() == XLenVT &&
3949  "Unexpected pointer type");
3950  // Targets have to explicitly opt-in for extending vector loads and
3951  // truncating vector stores.
3952  assert(!MSN->isTruncatingStore() && "Unexpected extending MSCATTER");
3953 
3954  // If the mask is known to be all ones, optimize to an unmasked intrinsic;
3955  // the selection of the masked intrinsics doesn't do this for us.
3956  bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
3957 
3958  SDValue VL;
3959  if (VT.isFixedLengthVector()) {
3960  // We need to use the larger of the value and index type to determine the
3961  // scalable type to use so we don't increase LMUL for any operand/result.
3962  if (VT.bitsGE(IndexVT)) {
3964  IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
3965  VT.getVectorElementCount());
3966  } else {
3967  IndexVT = getContainerForFixedLengthVector(IndexVT);
3969  IndexVT.getVectorElementCount());
3970  }
3971 
3972  Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
3973  Val = convertToScalableVector(VT, Val, DAG, Subtarget);
3974 
3975  if (!IsUnmasked) {
3977  Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
3978  }