LLVM  15.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
16 #include "RISCV.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
31 #include "llvm/IR/DiagnosticInfo.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/KnownBits.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "riscv-lower"
45 
46 STATISTIC(NumTailCalls, "Number of tail calls");
47 
49  const RISCVSubtarget &STI)
50  : TargetLowering(TM), Subtarget(STI) {
51 
52  if (Subtarget.isRV32E())
53  report_fatal_error("Codegen not yet implemented for RV32E");
54 
55  RISCVABI::ABI ABI = Subtarget.getTargetABI();
56  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
57 
59  !Subtarget.hasStdExtF()) {
60  errs() << "Hard-float 'f' ABI can't be used for a target that "
61  "doesn't support the F instruction set extension (ignoring "
62  "target-abi)\n";
64  } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
65  !Subtarget.hasStdExtD()) {
66  errs() << "Hard-float 'd' ABI can't be used for a target that "
67  "doesn't support the D instruction set extension (ignoring "
68  "target-abi)\n";
70  }
71 
72  switch (ABI) {
73  default:
74  report_fatal_error("Don't know how to lower this ABI");
78  case RISCVABI::ABI_LP64:
81  break;
82  }
83 
84  MVT XLenVT = Subtarget.getXLenVT();
85 
86  // Set up the register classes.
87  addRegisterClass(XLenVT, &RISCV::GPRRegClass);
88 
89  if (Subtarget.hasStdExtZfh())
90  addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
91  if (Subtarget.hasStdExtF())
92  addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
93  if (Subtarget.hasStdExtD())
94  addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
95 
96  static const MVT::SimpleValueType BoolVecVTs[] = {
99  static const MVT::SimpleValueType IntVecVTs[] = {
105  static const MVT::SimpleValueType F16VecVTs[] = {
108  static const MVT::SimpleValueType F32VecVTs[] = {
110  static const MVT::SimpleValueType F64VecVTs[] = {
112 
113  if (Subtarget.hasVInstructions()) {
114  auto addRegClassForRVV = [this](MVT VT) {
115  unsigned Size = VT.getSizeInBits().getKnownMinValue();
116  assert(Size <= 512 && isPowerOf2_32(Size));
117  const TargetRegisterClass *RC;
118  if (Size <= 64)
119  RC = &RISCV::VRRegClass;
120  else if (Size == 128)
121  RC = &RISCV::VRM2RegClass;
122  else if (Size == 256)
123  RC = &RISCV::VRM4RegClass;
124  else
125  RC = &RISCV::VRM8RegClass;
126 
127  addRegisterClass(VT, RC);
128  };
129 
130  for (MVT VT : BoolVecVTs)
131  addRegClassForRVV(VT);
132  for (MVT VT : IntVecVTs) {
133  if (VT.getVectorElementType() == MVT::i64 &&
134  !Subtarget.hasVInstructionsI64())
135  continue;
136  addRegClassForRVV(VT);
137  }
138 
139  if (Subtarget.hasVInstructionsF16())
140  for (MVT VT : F16VecVTs)
141  addRegClassForRVV(VT);
142 
143  if (Subtarget.hasVInstructionsF32())
144  for (MVT VT : F32VecVTs)
145  addRegClassForRVV(VT);
146 
147  if (Subtarget.hasVInstructionsF64())
148  for (MVT VT : F64VecVTs)
149  addRegClassForRVV(VT);
150 
151  if (Subtarget.useRVVForFixedLengthVectors()) {
152  auto addRegClassForFixedVectors = [this](MVT VT) {
153  MVT ContainerVT = getContainerForFixedLengthVector(VT);
154  unsigned RCID = getRegClassIDForVecVT(ContainerVT);
155  const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
156  addRegisterClass(VT, TRI.getRegClass(RCID));
157  };
159  if (useRVVForFixedLengthVectorVT(VT))
160  addRegClassForFixedVectors(VT);
161 
163  if (useRVVForFixedLengthVectorVT(VT))
164  addRegClassForFixedVectors(VT);
165  }
166  }
167 
168  // Compute derived properties from the register classes.
170 
172 
174  MVT::i1, Promote);
175 
176  // TODO: add all necessary setOperationAction calls.
178 
183 
185 
188 
190  if (!Subtarget.hasStdExtZbb())
192 
193  if (Subtarget.is64Bit()) {
195  MVT::i32, Custom);
196 
198  MVT::i32, Custom);
199  } else {
201  {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
202  nullptr);
203  setLibcallName(RTLIB::MULO_I64, nullptr);
204  }
205 
206  if (!Subtarget.hasStdExtM()) {
209  XLenVT, Expand);
210  } else {
211  if (Subtarget.is64Bit()) {
213 
216  } else {
218  }
219  }
220 
223  Expand);
224 
226  Custom);
227 
228  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
229  Subtarget.hasStdExtZbkb()) {
230  if (Subtarget.is64Bit())
232  } else {
234  }
235 
236  if (Subtarget.hasStdExtZbp()) {
237  // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
238  // more combining.
240 
241  // BSWAP i8 doesn't exist.
243 
245 
246  if (Subtarget.is64Bit())
248  } else {
249  // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
250  // pattern match it directly in isel.
252  (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
253  ? Legal
254  : Expand);
255  // Zbkb can use rev8+brev8 to implement bitreverse.
257  Subtarget.hasStdExtZbkb() ? Custom : Expand);
258  }
259 
260  if (Subtarget.hasStdExtZbb()) {
262  Legal);
263 
264  if (Subtarget.is64Bit())
267  MVT::i32, Custom);
268  } else {
270 
271  if (Subtarget.is64Bit())
273  }
274 
275  if (Subtarget.hasStdExtZbt()) {
278 
279  if (Subtarget.is64Bit())
281  } else {
283  }
284 
285  static constexpr ISD::NodeType FPLegalNodeTypes[] = {
292 
293  static const ISD::CondCode FPCCToExpand[] = {
297 
298  static const ISD::NodeType FPOpToExpand[] = {
301 
302  if (Subtarget.hasStdExtZfh())
304 
305  if (Subtarget.hasStdExtZfh()) {
306  for (auto NT : FPLegalNodeTypes)
310  setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
314 
320  MVT::f16, Promote);
321 
322  // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
323  // complete support for all operations in LegalizeDAG.
324 
325  // We need to custom promote this.
326  if (Subtarget.is64Bit())
328  }
329 
330  if (Subtarget.hasStdExtF()) {
331  for (auto NT : FPLegalNodeTypes)
333  setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
337  for (auto Op : FPOpToExpand)
341  }
342 
343  if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
345 
346  if (Subtarget.hasStdExtD()) {
347  for (auto NT : FPLegalNodeTypes)
351  setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
357  for (auto Op : FPOpToExpand)
361  }
362 
363  if (Subtarget.is64Bit())
366  MVT::i32, Custom);
367 
368  if (Subtarget.hasStdExtF()) {
370  Custom);
371 
374  XLenVT, Legal);
375 
378  }
379 
382  XLenVT, Custom);
383 
385 
386  // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
387  // Unfortunately this can't be determined just from the ISA naming string.
389  Subtarget.is64Bit() ? Legal : Custom);
390 
393  if (Subtarget.is64Bit())
395 
396  if (Subtarget.hasStdExtA()) {
399  } else {
401  }
402 
404 
405  if (Subtarget.hasVInstructions()) {
407 
409 
410  // RVV intrinsics may have illegal operands.
411  // We also need to custom legalize vmv.x.s.
413  {MVT::i8, MVT::i16}, Custom);
414  if (Subtarget.is64Bit())
416  else
418  MVT::i64, Custom);
419 
421  MVT::Other, Custom);
422 
423  static const unsigned IntegerVPOps[] = {
424  ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
425  ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
426  ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
427  ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
428  ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
429  ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
430  ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
431  ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FPTOSI,
432  ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
433  ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE};
434 
435  static const unsigned FloatingPointVPOps[] = {
436  ISD::VP_FADD, ISD::VP_FSUB,
437  ISD::VP_FMUL, ISD::VP_FDIV,
438  ISD::VP_FNEG, ISD::VP_FMA,
439  ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
440  ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX,
441  ISD::VP_MERGE, ISD::VP_SELECT,
442  ISD::VP_SITOFP, ISD::VP_UITOFP,
443  ISD::VP_SETCC, ISD::VP_FP_ROUND,
444  ISD::VP_FP_EXTEND};
445 
446  if (!Subtarget.is64Bit()) {
447  // We must custom-lower certain vXi64 operations on RV32 due to the vector
448  // element type being illegal.
450  MVT::i64, Custom);
451 
456  MVT::i64, Custom);
457 
458  setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
459  ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
460  ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
461  ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
462  MVT::i64, Custom);
463  }
464 
465  for (MVT VT : BoolVecVTs) {
467 
468  // Mask VTs are custom-expanded into a series of standard nodes
471  VT, Custom);
472 
474  Custom);
475 
478  {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
479  Expand);
480 
481  setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
482 
485  Custom);
486 
488  {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
489  Custom);
490 
491  // RVV has native int->float & float->int conversions where the
492  // element type sizes are within one power-of-two of each other. Any
493  // wider distances between type sizes have to be lowered as sequences
494  // which progressively narrow the gap in stages.
497  VT, Custom);
498 
499  // Expand all extending loads to types larger than this, and truncating
500  // stores from types larger than this.
501  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
502  setTruncStoreAction(OtherVT, VT, Expand);
504  VT, Expand);
505  }
506 
508  {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_TRUNCATE, ISD::VP_SETCC}, VT,
509  Custom);
510  }
511 
512  for (MVT VT : IntVecVTs) {
513  if (VT.getVectorElementType() == MVT::i64 &&
514  !Subtarget.hasVInstructionsI64())
515  continue;
516 
519 
520  // Vectors implement MULHS/MULHU.
522 
523  // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
524  if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
526 
528  Legal);
529 
531 
533  Expand);
534 
536 
537  // Custom-lower extensions and truncations from/to mask types.
539  VT, Custom);
540 
541  // RVV has native int->float & float->int conversions where the
542  // element type sizes are within one power-of-two of each other. Any
543  // wider distances between type sizes have to be lowered as sequences
544  // which progressively narrow the gap in stages.
547  VT, Custom);
548 
551 
552  // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
553  // nodes which truncate by one power of two at a time.
555 
556  // Custom-lower insert/extract operations to simplify patterns.
558  Custom);
559 
560  // Custom-lower reduction operations to set up the corresponding custom
561  // nodes' operands.
566  VT, Custom);
567 
568  setOperationAction(IntegerVPOps, VT, Custom);
569 
571 
573  VT, Custom);
574 
576  {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
577  Custom);
578 
581  VT, Custom);
582 
585 
587 
588  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
589  setTruncStoreAction(VT, OtherVT, Expand);
591  VT, Expand);
592  }
593 
594  // Splice
596 
597  // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
598  // type that can represent the value exactly.
599  if (VT.getVectorElementType() != MVT::i64) {
600  MVT FloatEltVT =
602  EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
603  if (isTypeLegal(FloatVT)) {
605  Custom);
606  }
607  }
608  }
609 
610  // Expand various CCs to best match the RVV ISA, which natively supports UNE
611  // but no other unordered comparisons, and supports all ordered comparisons
612  // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
613  // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
614  // and we pattern-match those back to the "original", swapping operands once
615  // more. This way we catch both operations and both "vf" and "fv" forms with
616  // fewer patterns.
617  static const ISD::CondCode VFPCCToExpand[] = {
621  };
622 
623  // Sets common operation actions on RVV floating-point vector types.
624  const auto SetCommonVFPActions = [&](MVT VT) {
626  // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
627  // sizes are within one power-of-two of each other. Therefore conversions
628  // between vXf16 and vXf64 must be lowered as sequences which convert via
629  // vXf32.
631  // Custom-lower insert/extract operations to simplify patterns.
633  Custom);
634  // Expand various condition codes (explained above).
635  setCondCodeAction(VFPCCToExpand, VT, Expand);
636 
638 
640  VT, Custom);
641 
644  VT, Custom);
645 
647 
649 
651  VT, Custom);
652 
654  {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
655  Custom);
656 
659 
662  VT, Custom);
663 
665 
666  setOperationAction(FloatingPointVPOps, VT, Custom);
667  };
668 
669  // Sets common extload/truncstore actions on RVV floating-point vector
670  // types.
671  const auto SetCommonVFPExtLoadTruncStoreActions =
672  [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
673  for (auto SmallVT : SmallerVTs) {
674  setTruncStoreAction(VT, SmallVT, Expand);
675  setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
676  }
677  };
678 
679  if (Subtarget.hasVInstructionsF16())
680  for (MVT VT : F16VecVTs)
681  SetCommonVFPActions(VT);
682 
683  for (MVT VT : F32VecVTs) {
684  if (Subtarget.hasVInstructionsF32())
685  SetCommonVFPActions(VT);
686  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
687  }
688 
689  for (MVT VT : F64VecVTs) {
690  if (Subtarget.hasVInstructionsF64())
691  SetCommonVFPActions(VT);
692  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
693  SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
694  }
695 
696  if (Subtarget.useRVVForFixedLengthVectors()) {
698  if (!useRVVForFixedLengthVectorVT(VT))
699  continue;
700 
701  // By default everything must be expanded.
702  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
704  for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
705  setTruncStoreAction(VT, OtherVT, Expand);
707  OtherVT, VT, Expand);
708  }
709 
710  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
712  Custom);
713 
715  Custom);
716 
718  VT, Custom);
719 
721 
723 
725 
727 
729 
732  Custom);
733 
735  {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
736  Custom);
737 
740  VT, Custom);
741 
742  // Operations below are different for between masks and other vectors.
743  if (VT.getVectorElementType() == MVT::i1) {
744  setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
745  ISD::OR, ISD::XOR},
746  VT, Custom);
747 
749  {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_TRUNCATE},
750  VT, Custom);
751  continue;
752  }
753 
754  // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
755  // it before type legalization for i64 vectors on RV32. It will then be
756  // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
757  // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
758  // improvements first.
759  if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
762  }
763 
766 
769 
771  {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
772  Custom);
773 
777  VT, Custom);
778 
781 
782  // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
783  if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
785 
788  Custom);
789 
792 
795 
796  // Custom-lower reduction operations to set up the corresponding custom
797  // nodes' operands.
801  VT, Custom);
802 
803  setOperationAction(IntegerVPOps, VT, Custom);
804 
805  // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
806  // type that can represent the value exactly.
807  if (VT.getVectorElementType() != MVT::i64) {
808  MVT FloatEltVT =
810  EVT FloatVT =
811  MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
812  if (isTypeLegal(FloatVT))
814  Custom);
815  }
816  }
817 
819  if (!useRVVForFixedLengthVectorVT(VT))
820  continue;
821 
822  // By default everything must be expanded.
823  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
825  for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
826  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
827  setTruncStoreAction(VT, OtherVT, Expand);
828  }
829 
830  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
832  Custom);
833 
837  VT, Custom);
838 
841  VT, Custom);
842 
844  {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
845  Custom);
846 
850  VT, Custom);
851 
853 
855  VT, Custom);
856 
857  for (auto CC : VFPCCToExpand)
858  setCondCodeAction(CC, VT, Expand);
859 
862 
864 
867  VT, Custom);
868 
869  setOperationAction(FloatingPointVPOps, VT, Custom);
870  }
871 
872  // Custom-legalize bitcasts from fixed-length vectors to scalar types.
874  Custom);
875  if (Subtarget.hasStdExtZfh())
877  if (Subtarget.hasStdExtF())
879  if (Subtarget.hasStdExtD())
881  }
882  }
883 
884  // Function alignments.
885  const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
886  setMinFunctionAlignment(FunctionAlignment);
887  setPrefFunctionAlignment(FunctionAlignment);
888 
890 
891  // Jumps are expensive, compared to logic
893 
895  ISD::OR, ISD::XOR});
896 
897  if (Subtarget.hasStdExtF())
899 
900  if (Subtarget.hasStdExtZbp())
902 
903  if (Subtarget.hasStdExtZbb())
905 
906  if (Subtarget.hasStdExtZbkb())
908  if (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZbb())
910  if (Subtarget.hasStdExtF())
913  if (Subtarget.hasVInstructions())
915  ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
917 
918  setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
919  setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
920 }
921 
924  EVT VT) const {
925  if (!VT.isVector())
926  return getPointerTy(DL);
927  if (Subtarget.hasVInstructions() &&
928  (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
931 }
932 
933 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
934  return Subtarget.getXLenVT();
935 }
936 
938  const CallInst &I,
939  MachineFunction &MF,
940  unsigned Intrinsic) const {
941  auto &DL = I.getModule()->getDataLayout();
942  switch (Intrinsic) {
943  default:
944  return false;
945  case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
946  case Intrinsic::riscv_masked_atomicrmw_add_i32:
947  case Intrinsic::riscv_masked_atomicrmw_sub_i32:
948  case Intrinsic::riscv_masked_atomicrmw_nand_i32:
949  case Intrinsic::riscv_masked_atomicrmw_max_i32:
950  case Intrinsic::riscv_masked_atomicrmw_min_i32:
951  case Intrinsic::riscv_masked_atomicrmw_umax_i32:
952  case Intrinsic::riscv_masked_atomicrmw_umin_i32:
953  case Intrinsic::riscv_masked_cmpxchg_i32:
955  Info.memVT = MVT::i32;
956  Info.ptrVal = I.getArgOperand(0);
957  Info.offset = 0;
958  Info.align = Align(4);
961  return true;
962  case Intrinsic::riscv_masked_strided_load:
964  Info.ptrVal = I.getArgOperand(1);
965  Info.memVT = getValueType(DL, I.getType()->getScalarType());
966  Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
969  return true;
970  case Intrinsic::riscv_masked_strided_store:
972  Info.ptrVal = I.getArgOperand(1);
973  Info.memVT =
974  getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
975  Info.align = Align(
976  DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
977  8);
980  return true;
981  case Intrinsic::riscv_seg2_load:
982  case Intrinsic::riscv_seg3_load:
983  case Intrinsic::riscv_seg4_load:
984  case Intrinsic::riscv_seg5_load:
985  case Intrinsic::riscv_seg6_load:
986  case Intrinsic::riscv_seg7_load:
987  case Intrinsic::riscv_seg8_load:
989  Info.ptrVal = I.getArgOperand(0);
990  Info.memVT =
991  getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
992  Info.align =
993  Align(DL.getTypeSizeInBits(
994  I.getType()->getStructElementType(0)->getScalarType()) /
995  8);
998  return true;
999  }
1000 }
1001 
1003  const AddrMode &AM, Type *Ty,
1004  unsigned AS,
1005  Instruction *I) const {
1006  // No global is ever allowed as a base.
1007  if (AM.BaseGV)
1008  return false;
1009 
1010  // RVV instructions only support register addressing.
1011  if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1012  return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1013 
1014  // Require a 12-bit signed offset.
1015  if (!isInt<12>(AM.BaseOffs))
1016  return false;
1017 
1018  switch (AM.Scale) {
1019  case 0: // "r+i" or just "i", depending on HasBaseReg.
1020  break;
1021  case 1:
1022  if (!AM.HasBaseReg) // allow "r+i".
1023  break;
1024  return false; // disallow "r+r" or "r+r+i".
1025  default:
1026  return false;
1027  }
1028 
1029  return true;
1030 }
1031 
1033  return isInt<12>(Imm);
1034 }
1035 
1037  return isInt<12>(Imm);
1038 }
1039 
1040 // On RV32, 64-bit integers are split into their high and low parts and held
1041 // in two different registers, so the trunc is free since the low register can
1042 // just be used.
1043 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1044  if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1045  return false;
1046  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1047  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1048  return (SrcBits == 64 && DestBits == 32);
1049 }
1050 
1051 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1052  if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1053  !SrcVT.isInteger() || !DstVT.isInteger())
1054  return false;
1055  unsigned SrcBits = SrcVT.getSizeInBits();
1056  unsigned DestBits = DstVT.getSizeInBits();
1057  return (SrcBits == 64 && DestBits == 32);
1058 }
1059 
1061  // Zexts are free if they can be combined with a load.
1062  // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1063  // poorly with type legalization of compares preferring sext.
1064  if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1065  EVT MemVT = LD->getMemoryVT();
1066  if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1067  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1068  LD->getExtensionType() == ISD::ZEXTLOAD))
1069  return true;
1070  }
1071 
1072  return TargetLowering::isZExtFree(Val, VT2);
1073 }
1074 
1076  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1077 }
1078 
1080  return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1081 }
1082 
1084  return Subtarget.hasStdExtZbb();
1085 }
1086 
1088  return Subtarget.hasStdExtZbb();
1089 }
1090 
1092  EVT VT = Y.getValueType();
1093 
1094  // FIXME: Support vectors once we have tests.
1095  if (VT.isVector())
1096  return false;
1097 
1098  return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
1099  Subtarget.hasStdExtZbkb()) &&
1100  !isa<ConstantSDNode>(Y);
1101 }
1102 
1104  // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1105  auto *C = dyn_cast<ConstantSDNode>(Y);
1106  return C && C->getAPIntValue().ule(10);
1107 }
1108 
1112  unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1113  SelectionDAG &DAG) const {
1114  // One interesting pattern that we'd want to form is 'bit extract':
1115  // ((1 >> Y) & 1) ==/!= 0
1116  // But we also need to be careful not to try to reverse that fold.
1117 
1118  // Is this '((1 >> Y) & 1)'?
1119  if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1120  return false; // Keep the 'bit extract' pattern.
1121 
1122  // Will this be '((1 >> Y) & 1)' after the transform?
1123  if (NewShiftOpcode == ISD::SRL && CC->isOne())
1124  return true; // Do form the 'bit extract' pattern.
1125 
1126  // If 'X' is a constant, and we transform, then we will immediately
1127  // try to undo the fold, thus causing endless combine loop.
1128  // So only do the transform if X is not a constant. This matches the default
1129  // implementation of this function.
1130  return !XC;
1131 }
1132 
1133 /// Check if sinking \p I's operands to I's basic block is profitable, because
1134 /// the operands can be folded into a target instruction, e.g.
1135 /// splats of scalars can fold into vector instructions.
1137  Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1138  using namespace llvm::PatternMatch;
1139 
1140  if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1141  return false;
1142 
1143  auto IsSinker = [&](Instruction *I, int Operand) {
1144  switch (I->getOpcode()) {
1145  case Instruction::Add:
1146  case Instruction::Sub:
1147  case Instruction::Mul:
1148  case Instruction::And:
1149  case Instruction::Or:
1150  case Instruction::Xor:
1151  case Instruction::FAdd:
1152  case Instruction::FSub:
1153  case Instruction::FMul:
1154  case Instruction::FDiv:
1155  case Instruction::ICmp:
1156  case Instruction::FCmp:
1157  return true;
1158  case Instruction::Shl:
1159  case Instruction::LShr:
1160  case Instruction::AShr:
1161  case Instruction::UDiv:
1162  case Instruction::SDiv:
1163  case Instruction::URem:
1164  case Instruction::SRem:
1165  return Operand == 1;
1166  case Instruction::Call:
1167  if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1168  switch (II->getIntrinsicID()) {
1169  case Intrinsic::fma:
1170  case Intrinsic::vp_fma:
1171  return Operand == 0 || Operand == 1;
1172  // FIXME: Our patterns can only match vx/vf instructions when the splat
1173  // it on the RHS, because TableGen doesn't recognize our VP operations
1174  // as commutative.
1175  case Intrinsic::vp_add:
1176  case Intrinsic::vp_mul:
1177  case Intrinsic::vp_and:
1178  case Intrinsic::vp_or:
1179  case Intrinsic::vp_xor:
1180  case Intrinsic::vp_fadd:
1181  case Intrinsic::vp_fmul:
1182  case Intrinsic::vp_shl:
1183  case Intrinsic::vp_lshr:
1184  case Intrinsic::vp_ashr:
1185  case Intrinsic::vp_udiv:
1186  case Intrinsic::vp_sdiv:
1187  case Intrinsic::vp_urem:
1188  case Intrinsic::vp_srem:
1189  return Operand == 1;
1190  // ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have
1191  // explicit patterns for both LHS and RHS (as 'vr' versions).
1192  case Intrinsic::vp_sub:
1193  case Intrinsic::vp_fsub:
1194  case Intrinsic::vp_fdiv:
1195  return Operand == 0 || Operand == 1;
1196  default:
1197  return false;
1198  }
1199  }
1200  return false;
1201  default:
1202  return false;
1203  }
1204  };
1205 
1206  for (auto OpIdx : enumerate(I->operands())) {
1207  if (!IsSinker(I, OpIdx.index()))
1208  continue;
1209 
1210  Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1211  // Make sure we are not already sinking this operand
1212  if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1213  continue;
1214 
1215  // We are looking for a splat that can be sunk.
1217  m_Undef(), m_ZeroMask())))
1218  continue;
1219 
1220  // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1221  // and vector registers
1222  for (Use &U : Op->uses()) {
1223  Instruction *Insn = cast<Instruction>(U.getUser());
1224  if (!IsSinker(Insn, U.getOperandNo()))
1225  return false;
1226  }
1227 
1228  Ops.push_back(&Op->getOperandUse(0));
1229  Ops.push_back(&OpIdx.value());
1230  }
1231  return true;
1232 }
1233 
1235  const GlobalAddressSDNode *GA) const {
1236  // In order to maximise the opportunity for common subexpression elimination,
1237  // keep a separate ADD node for the global address offset instead of folding
1238  // it in the global address node. Later peephole optimisations may choose to
1239  // fold it back in when profitable.
1240  return false;
1241 }
1242 
1244  bool ForCodeSize) const {
1245  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1246  if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1247  return false;
1248  if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1249  return false;
1250  if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1251  return false;
1252  return Imm.isZero();
1253 }
1254 
1256  return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1257  (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1258  (VT == MVT::f64 && Subtarget.hasStdExtD());
1259 }
1260 
1262  CallingConv::ID CC,
1263  EVT VT) const {
1264  // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1265  // We might still end up using a GPR but that will be decided based on ABI.
1266  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1267  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1268  return MVT::f32;
1269 
1271 }
1272 
1274  CallingConv::ID CC,
1275  EVT VT) const {
1276  // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1277  // We might still end up using a GPR but that will be decided based on ABI.
1278  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1279  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1280  return 1;
1281 
1283 }
1284 
1285 // Changes the condition code and swaps operands if necessary, so the SetCC
1286 // operation matches one of the comparisons supported directly by branches
1287 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1288 // with 1/-1.
1290  ISD::CondCode &CC, SelectionDAG &DAG) {
1291  // Convert X > -1 to X >= 0.
1292  if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1293  RHS = DAG.getConstant(0, DL, RHS.getValueType());
1294  CC = ISD::SETGE;
1295  return;
1296  }
1297  // Convert X < 1 to 0 >= X.
1298  if (CC == ISD::SETLT && isOneConstant(RHS)) {
1299  RHS = LHS;
1300  LHS = DAG.getConstant(0, DL, RHS.getValueType());
1301  CC = ISD::SETGE;
1302  return;
1303  }
1304 
1305  switch (CC) {
1306  default:
1307  break;
1308  case ISD::SETGT:
1309  case ISD::SETLE:
1310  case ISD::SETUGT:
1311  case ISD::SETULE:
1313  std::swap(LHS, RHS);
1314  break;
1315  }
1316 }
1317 
1319  assert(VT.isScalableVector() && "Expecting a scalable vector type");
1320  unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1321  if (VT.getVectorElementType() == MVT::i1)
1322  KnownSize *= 8;
1323 
1324  switch (KnownSize) {
1325  default:
1326  llvm_unreachable("Invalid LMUL.");
1327  case 8:
1328  return RISCVII::VLMUL::LMUL_F8;
1329  case 16:
1330  return RISCVII::VLMUL::LMUL_F4;
1331  case 32:
1332  return RISCVII::VLMUL::LMUL_F2;
1333  case 64:
1334  return RISCVII::VLMUL::LMUL_1;
1335  case 128:
1336  return RISCVII::VLMUL::LMUL_2;
1337  case 256:
1338  return RISCVII::VLMUL::LMUL_4;
1339  case 512:
1340  return RISCVII::VLMUL::LMUL_8;
1341  }
1342 }
1343 
1345  switch (LMul) {
1346  default:
1347  llvm_unreachable("Invalid LMUL.");
1352  return RISCV::VRRegClassID;
1354  return RISCV::VRM2RegClassID;
1356  return RISCV::VRM4RegClassID;
1358  return RISCV::VRM8RegClassID;
1359  }
1360 }
1361 
1362 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
1363  RISCVII::VLMUL LMUL = getLMUL(VT);
1364  if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1368  static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1369  "Unexpected subreg numbering");
1370  return RISCV::sub_vrm1_0 + Index;
1371  }
1372  if (LMUL == RISCVII::VLMUL::LMUL_2) {
1373  static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1374  "Unexpected subreg numbering");
1375  return RISCV::sub_vrm2_0 + Index;
1376  }
1377  if (LMUL == RISCVII::VLMUL::LMUL_4) {
1378  static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1379  "Unexpected subreg numbering");
1380  return RISCV::sub_vrm4_0 + Index;
1381  }
1382  llvm_unreachable("Invalid vector type.");
1383 }
1384 
1386  if (VT.getVectorElementType() == MVT::i1)
1387  return RISCV::VRRegClassID;
1388  return getRegClassIDForLMUL(getLMUL(VT));
1389 }
1390 
1391 // Attempt to decompose a subvector insert/extract between VecVT and
1392 // SubVecVT via subregister indices. Returns the subregister index that
1393 // can perform the subvector insert/extract with the given element index, as
1394 // well as the index corresponding to any leftover subvectors that must be
1395 // further inserted/extracted within the register class for SubVecVT.
1396 std::pair<unsigned, unsigned>
1398  MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1399  const RISCVRegisterInfo *TRI) {
1400  static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1401  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1402  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1403  "Register classes not ordered");
1404  unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1405  unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1406  // Try to compose a subregister index that takes us from the incoming
1407  // LMUL>1 register class down to the outgoing one. At each step we half
1408  // the LMUL:
1409  // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1410  // Note that this is not guaranteed to find a subregister index, such as
1411  // when we are extracting from one VR type to another.
1412  unsigned SubRegIdx = RISCV::NoSubRegister;
1413  for (const unsigned RCID :
1414  {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1415  if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1416  VecVT = VecVT.getHalfNumVectorElementsVT();
1417  bool IsHi =
1418  InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1419  SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1420  getSubregIndexByMVT(VecVT, IsHi));
1421  if (IsHi)
1422  InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1423  }
1424  return {SubRegIdx, InsertExtractIdx};
1425 }
1426 
1427 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1428 // stores for those types.
1429 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1430  return !Subtarget.useRVVForFixedLengthVectors() ||
1432 }
1433 
1435  if (ScalarTy->isPointerTy())
1436  return true;
1437 
1438  if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1439  ScalarTy->isIntegerTy(32))
1440  return true;
1441 
1442  if (ScalarTy->isIntegerTy(64))
1443  return Subtarget.hasVInstructionsI64();
1444 
1445  if (ScalarTy->isHalfTy())
1446  return Subtarget.hasVInstructionsF16();
1447  if (ScalarTy->isFloatTy())
1448  return Subtarget.hasVInstructionsF32();
1449  if (ScalarTy->isDoubleTy())
1450  return Subtarget.hasVInstructionsF64();
1451 
1452  return false;
1453 }
1454 
1456  assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1457  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
1458  "Unexpected opcode");
1459  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
1460  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
1462  RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
1463  if (!II)
1464  return SDValue();
1465  return Op.getOperand(II->VLOperand + 1 + HasChain);
1466 }
1467 
1468 static bool useRVVForFixedLengthVectorVT(MVT VT,
1469  const RISCVSubtarget &Subtarget) {
1470  assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1471  if (!Subtarget.useRVVForFixedLengthVectors())
1472  return false;
1473 
1474  // We only support a set of vector types with a consistent maximum fixed size
1475  // across all supported vector element types to avoid legalization issues.
1476  // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1477  // fixed-length vector type we support is 1024 bytes.
1478  if (VT.getFixedSizeInBits() > 1024 * 8)
1479  return false;
1480 
1481  unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1482 
1483  MVT EltVT = VT.getVectorElementType();
1484 
1485  // Don't use RVV for vectors we cannot scalarize if required.
1486  switch (EltVT.SimpleTy) {
1487  // i1 is supported but has different rules.
1488  default:
1489  return false;
1490  case MVT::i1:
1491  // Masks can only use a single register.
1492  if (VT.getVectorNumElements() > MinVLen)
1493  return false;
1494  MinVLen /= 8;
1495  break;
1496  case MVT::i8:
1497  case MVT::i16:
1498  case MVT::i32:
1499  break;
1500  case MVT::i64:
1501  if (!Subtarget.hasVInstructionsI64())
1502  return false;
1503  break;
1504  case MVT::f16:
1505  if (!Subtarget.hasVInstructionsF16())
1506  return false;
1507  break;
1508  case MVT::f32:
1509  if (!Subtarget.hasVInstructionsF32())
1510  return false;
1511  break;
1512  case MVT::f64:
1513  if (!Subtarget.hasVInstructionsF64())
1514  return false;
1515  break;
1516  }
1517 
1518  // Reject elements larger than ELEN.
1519  if (EltVT.getSizeInBits() > Subtarget.getELEN())
1520  return false;
1521 
1522  unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1523  // Don't use RVV for types that don't fit.
1524  if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1525  return false;
1526 
1527  // TODO: Perhaps an artificial restriction, but worth having whilst getting
1528  // the base fixed length RVV support in place.
1529  if (!VT.isPow2VectorType())
1530  return false;
1531 
1532  return true;
1533 }
1534 
1535 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1537 }
1538 
1539 // Return the largest legal scalable vector type that matches VT's element type.
1541  const RISCVSubtarget &Subtarget) {
1542  // This may be called before legal types are setup.
1543  assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1544  useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1545  "Expected legal fixed length vector!");
1546 
1547  unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1548  unsigned MaxELen = Subtarget.getELEN();
1549 
1550  MVT EltVT = VT.getVectorElementType();
1551  switch (EltVT.SimpleTy) {
1552  default:
1553  llvm_unreachable("unexpected element type for RVV container");
1554  case MVT::i1:
1555  case MVT::i8:
1556  case MVT::i16:
1557  case MVT::i32:
1558  case MVT::i64:
1559  case MVT::f16:
1560  case MVT::f32:
1561  case MVT::f64: {
1562  // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1563  // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1564  // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1565  unsigned NumElts =
1566  (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1567  NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1568  assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1569  return MVT::getScalableVectorVT(EltVT, NumElts);
1570  }
1571  }
1572 }
1573 
1575  const RISCVSubtarget &Subtarget) {
1577  Subtarget);
1578 }
1579 
1582 }
1583 
1584 // Grow V to consume an entire RVV register.
1586  const RISCVSubtarget &Subtarget) {
1587  assert(VT.isScalableVector() &&
1588  "Expected to convert into a scalable vector!");
1590  "Expected a fixed length vector operand!");
1591  SDLoc DL(V);
1592  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1593  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1594 }
1595 
1596 // Shrink V so it's just big enough to maintain a VT's worth of data.
1598  const RISCVSubtarget &Subtarget) {
1599  assert(VT.isFixedLengthVector() &&
1600  "Expected to convert into a fixed length vector!");
1602  "Expected a scalable vector operand!");
1603  SDLoc DL(V);
1604  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1605  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1606 }
1607 
1608 /// Return the type of the mask type suitable for masking the provided
1609 /// vector type. This is simply an i1 element type vector of the same
1610 /// (possibly scalable) length.
1611 static MVT getMaskTypeFor(EVT VecVT) {
1612  assert(VecVT.isVector());
1613  ElementCount EC = VecVT.getVectorElementCount();
1614  return MVT::getVectorVT(MVT::i1, EC);
1615 }
1616 
1617 /// Creates an all ones mask suitable for masking a vector of type VecTy with
1618 /// vector length VL. .
1620  SelectionDAG &DAG) {
1621  MVT MaskVT = getMaskTypeFor(VecVT);
1622  return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1623 }
1624 
1625 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1626 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1627 // the vector type that it is contained in.
1628 static std::pair<SDValue, SDValue>
1629 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1630  const RISCVSubtarget &Subtarget) {
1631  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1632  MVT XLenVT = Subtarget.getXLenVT();
1633  SDValue VL = VecVT.isFixedLengthVector()
1634  ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1635  : DAG.getRegister(RISCV::X0, XLenVT);
1636  SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
1637  return {Mask, VL};
1638 }
1639 
1640 // As above but assuming the given type is a scalable vector type.
1641 static std::pair<SDValue, SDValue>
1643  const RISCVSubtarget &Subtarget) {
1644  assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1645  return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1646 }
1647 
1648 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1649 // of either is (currently) supported. This can get us into an infinite loop
1650 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1651 // as a ..., etc.
1652 // Until either (or both) of these can reliably lower any node, reporting that
1653 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1654 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1655 // which is not desirable.
1657  EVT VT, unsigned DefinedValues) const {
1658  return false;
1659 }
1660 
1662  const RISCVSubtarget &Subtarget) {
1663  // RISCV FP-to-int conversions saturate to the destination register size, but
1664  // don't produce 0 for nan. We can use a conversion instruction and fix the
1665  // nan case with a compare and a select.
1666  SDValue Src = Op.getOperand(0);
1667 
1668  EVT DstVT = Op.getValueType();
1669  EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1670 
1671  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1672  unsigned Opc;
1673  if (SatVT == DstVT)
1674  Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
1675  else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1676  Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
1677  else
1678  return SDValue();
1679  // FIXME: Support other SatVTs by clamping before or after the conversion.
1680 
1681  SDLoc DL(Op);
1682  SDValue FpToInt = DAG.getNode(
1683  Opc, DL, DstVT, Src,
1684  DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
1685 
1686  SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1687  return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1688 }
1689 
1690 // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
1691 // and back. Taking care to avoid converting values that are nan or already
1692 // correct.
1693 // TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
1694 // have FRM dependencies modeled yet.
1696  MVT VT = Op.getSimpleValueType();
1697  assert(VT.isVector() && "Unexpected type");
1698 
1699  SDLoc DL(Op);
1700 
1701  // Freeze the source since we are increasing the number of uses.
1702  SDValue Src = DAG.getFreeze(Op.getOperand(0));
1703 
1704  // Truncate to integer and convert back to FP.
1706  SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src);
1707  Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1708 
1710 
1711  if (Op.getOpcode() == ISD::FCEIL) {
1712  // If the truncated value is the greater than or equal to the original
1713  // value, we've computed the ceil. Otherwise, we went the wrong way and
1714  // need to increase by 1.
1715  // FIXME: This should use a masked operation. Handle here or in isel?
1716  SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated,
1717  DAG.getConstantFP(1.0, DL, VT));
1718  SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT);
1719  Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1720  } else if (Op.getOpcode() == ISD::FFLOOR) {
1721  // If the truncated value is the less than or equal to the original value,
1722  // we've computed the floor. Otherwise, we went the wrong way and need to
1723  // decrease by 1.
1724  // FIXME: This should use a masked operation. Handle here or in isel?
1725  SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated,
1726  DAG.getConstantFP(1.0, DL, VT));
1727  SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT);
1728  Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1729  }
1730 
1731  // Restore the original sign so that -0.0 is preserved.
1732  Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1733 
1734  // Determine the largest integer that can be represented exactly. This and
1735  // values larger than it don't have any fractional bits so don't need to
1736  // be converted.
1737  const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1738  unsigned Precision = APFloat::semanticsPrecision(FltSem);
1739  APFloat MaxVal = APFloat(FltSem);
1740  MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1741  /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1742  SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1743 
1744  // If abs(Src) was larger than MaxVal or nan, keep it.
1745  SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1746  SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1747  return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1748 }
1749 
1750 // ISD::FROUND is defined to round to nearest with ties rounding away from 0.
1751 // This mode isn't supported in vector hardware on RISCV. But as long as we
1752 // aren't compiling with trapping math, we can emulate this with
1753 // floor(X + copysign(nextafter(0.5, 0.0), X)).
1754 // FIXME: Could be shorter by changing rounding mode, but we don't have FRM
1755 // dependencies modeled yet.
1756 // FIXME: Use masked operations to avoid final merge.
1758  MVT VT = Op.getSimpleValueType();
1759  assert(VT.isVector() && "Unexpected type");
1760 
1761  SDLoc DL(Op);
1762 
1763  // Freeze the source since we are increasing the number of uses.
1764  SDValue Src = DAG.getFreeze(Op.getOperand(0));
1765 
1766  // We do the conversion on the absolute value and fix the sign at the end.
1767  SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1768 
1769  const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1770  bool Ignored;
1771  APFloat Point5Pred = APFloat(0.5f);
1772  Point5Pred.convert(FltSem, APFloat::rmNearestTiesToEven, &Ignored);
1773  Point5Pred.next(/*nextDown*/ true);
1774 
1775  // Add the adjustment.
1776  SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Abs,
1777  DAG.getConstantFP(Point5Pred, DL, VT));
1778 
1779  // Truncate to integer and convert back to fp.
1781  SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Adjust);
1782  Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1783 
1784  // Restore the original sign.
1785  Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1786 
1787  // Determine the largest integer that can be represented exactly. This and
1788  // values larger than it don't have any fractional bits so don't need to
1789  // be converted.
1790  unsigned Precision = APFloat::semanticsPrecision(FltSem);
1791  APFloat MaxVal = APFloat(FltSem);
1792  MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1793  /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1794  SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1795 
1796  // If abs(Src) was larger than MaxVal or nan, keep it.
1798  SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1799  return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1800 }
1801 
1802 struct VIDSequence {
1803  int64_t StepNumerator;
1805  int64_t Addend;
1806 };
1807 
1808 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1809 // to the (non-zero) step S and start value X. This can be then lowered as the
1810 // RVV sequence (VID * S) + X, for example.
1811 // The step S is represented as an integer numerator divided by a positive
1812 // denominator. Note that the implementation currently only identifies
1813 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1814 // cannot detect 2/3, for example.
1815 // Note that this method will also match potentially unappealing index
1816 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1817 // determine whether this is worth generating code for.
1819  unsigned NumElts = Op.getNumOperands();
1820  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1821  if (!Op.getValueType().isInteger())
1822  return None;
1823 
1824  Optional<unsigned> SeqStepDenom;
1825  Optional<int64_t> SeqStepNum, SeqAddend;
1827  unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1828  for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1829  // Assume undef elements match the sequence; we just have to be careful
1830  // when interpolating across them.
1831  if (Op.getOperand(Idx).isUndef())
1832  continue;
1833  // The BUILD_VECTOR must be all constants.
1834  if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1835  return None;
1836 
1837  uint64_t Val = Op.getConstantOperandVal(Idx) &
1838  maskTrailingOnes<uint64_t>(EltSizeInBits);
1839 
1840  if (PrevElt) {
1841  // Calculate the step since the last non-undef element, and ensure
1842  // it's consistent across the entire sequence.
1843  unsigned IdxDiff = Idx - PrevElt->second;
1844  int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1845 
1846  // A zero-value value difference means that we're somewhere in the middle
1847  // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1848  // step change before evaluating the sequence.
1849  if (ValDiff == 0)
1850  continue;
1851 
1852  int64_t Remainder = ValDiff % IdxDiff;
1853  // Normalize the step if it's greater than 1.
1854  if (Remainder != ValDiff) {
1855  // The difference must cleanly divide the element span.
1856  if (Remainder != 0)
1857  return None;
1858  ValDiff /= IdxDiff;
1859  IdxDiff = 1;
1860  }
1861 
1862  if (!SeqStepNum)
1863  SeqStepNum = ValDiff;
1864  else if (ValDiff != SeqStepNum)
1865  return None;
1866 
1867  if (!SeqStepDenom)
1868  SeqStepDenom = IdxDiff;
1869  else if (IdxDiff != *SeqStepDenom)
1870  return None;
1871  }
1872 
1873  // Record this non-undef element for later.
1874  if (!PrevElt || PrevElt->first != Val)
1875  PrevElt = std::make_pair(Val, Idx);
1876  }
1877 
1878  // We need to have logged a step for this to count as a legal index sequence.
1879  if (!SeqStepNum || !SeqStepDenom)
1880  return None;
1881 
1882  // Loop back through the sequence and validate elements we might have skipped
1883  // while waiting for a valid step. While doing this, log any sequence addend.
1884  for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1885  if (Op.getOperand(Idx).isUndef())
1886  continue;
1887  uint64_t Val = Op.getConstantOperandVal(Idx) &
1888  maskTrailingOnes<uint64_t>(EltSizeInBits);
1889  uint64_t ExpectedVal =
1890  (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1891  int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1892  if (!SeqAddend)
1893  SeqAddend = Addend;
1894  else if (Addend != SeqAddend)
1895  return None;
1896  }
1897 
1898  assert(SeqAddend && "Must have an addend if we have a step");
1899 
1900  return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
1901 }
1902 
1903 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
1904 // and lower it as a VRGATHER_VX_VL from the source vector.
1905 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
1906  SelectionDAG &DAG,
1907  const RISCVSubtarget &Subtarget) {
1908  if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1909  return SDValue();
1910  SDValue Vec = SplatVal.getOperand(0);
1911  // Only perform this optimization on vectors of the same size for simplicity.
1912  if (Vec.getValueType() != VT)
1913  return SDValue();
1914  SDValue Idx = SplatVal.getOperand(1);
1915  // The index must be a legal type.
1916  if (Idx.getValueType() != Subtarget.getXLenVT())
1917  return SDValue();
1918 
1919  MVT ContainerVT = VT;
1920  if (VT.isFixedLengthVector()) {
1921  ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1922  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
1923  }
1924 
1925  SDValue Mask, VL;
1926  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1927 
1928  SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
1929  Idx, Mask, VL);
1930 
1931  if (!VT.isFixedLengthVector())
1932  return Gather;
1933 
1934  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1935 }
1936 
1938  const RISCVSubtarget &Subtarget) {
1939  MVT VT = Op.getSimpleValueType();
1940  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1941 
1942  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1943 
1944  SDLoc DL(Op);
1945  SDValue Mask, VL;
1946  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1947 
1948  MVT XLenVT = Subtarget.getXLenVT();
1949  unsigned NumElts = Op.getNumOperands();
1950 
1951  if (VT.getVectorElementType() == MVT::i1) {
1952  if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1953  SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1954  return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1955  }
1956 
1957  if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1958  SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1959  return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1960  }
1961 
1962  // Lower constant mask BUILD_VECTORs via an integer vector type, in
1963  // scalar integer chunks whose bit-width depends on the number of mask
1964  // bits and XLEN.
1965  // First, determine the most appropriate scalar integer type to use. This
1966  // is at most XLenVT, but may be shrunk to a smaller vector element type
1967  // according to the size of the final vector - use i8 chunks rather than
1968  // XLenVT if we're producing a v8i1. This results in more consistent
1969  // codegen across RV32 and RV64.
1970  unsigned NumViaIntegerBits =
1971  std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1972  NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
1973  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1974  // If we have to use more than one INSERT_VECTOR_ELT then this
1975  // optimization is likely to increase code size; avoid peforming it in
1976  // such a case. We can use a load from a constant pool in this case.
1977  if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1978  return SDValue();
1979  // Now we can create our integer vector type. Note that it may be larger
1980  // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1981  MVT IntegerViaVecVT =
1982  MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1983  divideCeil(NumElts, NumViaIntegerBits));
1984 
1985  uint64_t Bits = 0;
1986  unsigned BitPos = 0, IntegerEltIdx = 0;
1987  SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1988 
1989  for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1990  // Once we accumulate enough bits to fill our scalar type, insert into
1991  // our vector and clear our accumulated data.
1992  if (I != 0 && I % NumViaIntegerBits == 0) {
1993  if (NumViaIntegerBits <= 32)
1994  Bits = SignExtend64<32>(Bits);
1995  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1996  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1997  Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1998  Bits = 0;
1999  BitPos = 0;
2000  IntegerEltIdx++;
2001  }
2002  SDValue V = Op.getOperand(I);
2003  bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
2004  Bits |= ((uint64_t)BitValue << BitPos);
2005  }
2006 
2007  // Insert the (remaining) scalar value into position in our integer
2008  // vector type.
2009  if (NumViaIntegerBits <= 32)
2010  Bits = SignExtend64<32>(Bits);
2011  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2012  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
2013  DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2014 
2015  if (NumElts < NumViaIntegerBits) {
2016  // If we're producing a smaller vector than our minimum legal integer
2017  // type, bitcast to the equivalent (known-legal) mask type, and extract
2018  // our final mask.
2019  assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
2020  Vec = DAG.getBitcast(MVT::v8i1, Vec);
2021  Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
2022  DAG.getConstant(0, DL, XLenVT));
2023  } else {
2024  // Else we must have produced an integer type with the same size as the
2025  // mask type; bitcast for the final result.
2026  assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
2027  Vec = DAG.getBitcast(VT, Vec);
2028  }
2029 
2030  return Vec;
2031  }
2032 
2033  // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
2034  // vector type, we have a legal equivalently-sized i8 type, so we can use
2035  // that.
2036  MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
2037  SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
2038 
2039  SDValue WideVec;
2040  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2041  // For a splat, perform a scalar truncate before creating the wider
2042  // vector.
2043  assert(Splat.getValueType() == XLenVT &&
2044  "Unexpected type for i1 splat value");
2045  Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
2046  DAG.getConstant(1, DL, XLenVT));
2047  WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
2048  } else {
2049  SmallVector<SDValue, 8> Ops(Op->op_values());
2050  WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
2051  SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
2052  WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
2053  }
2054 
2055  return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
2056  }
2057 
2058  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2059  if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
2060  return Gather;
2061  unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
2063  Splat =
2064  DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
2065  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2066  }
2067 
2068  // Try and match index sequences, which we can lower to the vid instruction
2069  // with optional modifications. An all-undef vector is matched by
2070  // getSplatValue, above.
2071  if (auto SimpleVID = isSimpleVIDSequence(Op)) {
2072  int64_t StepNumerator = SimpleVID->StepNumerator;
2073  unsigned StepDenominator = SimpleVID->StepDenominator;
2074  int64_t Addend = SimpleVID->Addend;
2075 
2076  assert(StepNumerator != 0 && "Invalid step");
2077  bool Negate = false;
2078  int64_t SplatStepVal = StepNumerator;
2079  unsigned StepOpcode = ISD::MUL;
2080  if (StepNumerator != 1) {
2081  if (isPowerOf2_64(std::abs(StepNumerator))) {
2082  Negate = StepNumerator < 0;
2083  StepOpcode = ISD::SHL;
2084  SplatStepVal = Log2_64(std::abs(StepNumerator));
2085  }
2086  }
2087 
2088  // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
2089  // threshold since it's the immediate value many RVV instructions accept.
2090  // There is no vmul.vi instruction so ensure multiply constant can fit in
2091  // a single addi instruction.
2092  if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
2093  (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
2094  isPowerOf2_32(StepDenominator) &&
2095  (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
2096  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
2097  // Convert right out of the scalable type so we can use standard ISD
2098  // nodes for the rest of the computation. If we used scalable types with
2099  // these, we'd lose the fixed-length vector info and generate worse
2100  // vsetvli code.
2101  VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
2102  if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
2103  (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
2104  SDValue SplatStep = DAG.getSplatBuildVector(
2105  VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
2106  VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
2107  }
2108  if (StepDenominator != 1) {
2109  SDValue SplatStep = DAG.getSplatBuildVector(
2110  VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
2111  VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
2112  }
2113  if (Addend != 0 || Negate) {
2114  SDValue SplatAddend = DAG.getSplatBuildVector(
2115  VT, DL, DAG.getConstant(Addend, DL, XLenVT));
2116  VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
2117  }
2118  return VID;
2119  }
2120  }
2121 
2122  // Attempt to detect "hidden" splats, which only reveal themselves as splats
2123  // when re-interpreted as a vector with a larger element type. For example,
2124  // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
2125  // could be instead splat as
2126  // v2i32 = build_vector i32 0x00010000, i32 0x00010000
2127  // TODO: This optimization could also work on non-constant splats, but it
2128  // would require bit-manipulation instructions to construct the splat value.
2130  unsigned EltBitSize = VT.getScalarSizeInBits();
2131  const auto *BV = cast<BuildVectorSDNode>(Op);
2132  if (VT.isInteger() && EltBitSize < 64 &&
2134  BV->getRepeatedSequence(Sequence) &&
2135  (Sequence.size() * EltBitSize) <= 64) {
2136  unsigned SeqLen = Sequence.size();
2137  MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
2138  MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
2139  assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
2140  ViaIntVT == MVT::i64) &&
2141  "Unexpected sequence type");
2142 
2143  unsigned EltIdx = 0;
2144  uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2145  uint64_t SplatValue = 0;
2146  // Construct the amalgamated value which can be splatted as this larger
2147  // vector type.
2148  for (const auto &SeqV : Sequence) {
2149  if (!SeqV.isUndef())
2150  SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2151  << (EltIdx * EltBitSize));
2152  EltIdx++;
2153  }
2154 
2155  // On RV64, sign-extend from 32 to 64 bits where possible in order to
2156  // achieve better constant materializion.
2157  if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
2158  SplatValue = SignExtend64<32>(SplatValue);
2159 
2160  // Since we can't introduce illegal i64 types at this stage, we can only
2161  // perform an i64 splat on RV32 if it is its own sign-extended value. That
2162  // way we can use RVV instructions to splat.
2163  assert((ViaIntVT.bitsLE(XLenVT) ||
2164  (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
2165  "Unexpected bitcast sequence");
2166  if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2167  SDValue ViaVL =
2168  DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
2169  MVT ViaContainerVT =
2170  getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
2171  SDValue Splat =
2172  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
2173  DAG.getUNDEF(ViaContainerVT),
2174  DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
2175  Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
2176  return DAG.getBitcast(VT, Splat);
2177  }
2178  }
2179 
2180  // Try and optimize BUILD_VECTORs with "dominant values" - these are values
2181  // which constitute a large proportion of the elements. In such cases we can
2182  // splat a vector with the dominant element and make up the shortfall with
2183  // INSERT_VECTOR_ELTs.
2184  // Note that this includes vectors of 2 elements by association. The
2185  // upper-most element is the "dominant" one, allowing us to use a splat to
2186  // "insert" the upper element, and an insert of the lower element at position
2187  // 0, which improves codegen.
2188  SDValue DominantValue;
2189  unsigned MostCommonCount = 0;
2190  DenseMap<SDValue, unsigned> ValueCounts;
2191  unsigned NumUndefElts =
2192  count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
2193 
2194  // Track the number of scalar loads we know we'd be inserting, estimated as
2195  // any non-zero floating-point constant. Other kinds of element are either
2196  // already in registers or are materialized on demand. The threshold at which
2197  // a vector load is more desirable than several scalar materializion and
2198  // vector-insertion instructions is not known.
2199  unsigned NumScalarLoads = 0;
2200 
2201  for (SDValue V : Op->op_values()) {
2202  if (V.isUndef())
2203  continue;
2204 
2205  ValueCounts.insert(std::make_pair(V, 0));
2206  unsigned &Count = ValueCounts[V];
2207 
2208  if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2209  NumScalarLoads += !CFP->isExactlyValue(+0.0);
2210 
2211  // Is this value dominant? In case of a tie, prefer the highest element as
2212  // it's cheaper to insert near the beginning of a vector than it is at the
2213  // end.
2214  if (++Count >= MostCommonCount) {
2215  DominantValue = V;
2216  MostCommonCount = Count;
2217  }
2218  }
2219 
2220  assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
2221  unsigned NumDefElts = NumElts - NumUndefElts;
2222  unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2223 
2224  // Don't perform this optimization when optimizing for size, since
2225  // materializing elements and inserting them tends to cause code bloat.
2226  if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
2227  ((MostCommonCount > DominantValueCountThreshold) ||
2228  (ValueCounts.size() <= Log2_32(NumDefElts)))) {
2229  // Start by splatting the most common element.
2230  SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
2231 
2232  DenseSet<SDValue> Processed{DominantValue};
2233  MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
2234  for (const auto &OpIdx : enumerate(Op->ops())) {
2235  const SDValue &V = OpIdx.value();
2236  if (V.isUndef() || !Processed.insert(V).second)
2237  continue;
2238  if (ValueCounts[V] == 1) {
2239  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
2240  DAG.getConstant(OpIdx.index(), DL, XLenVT));
2241  } else {
2242  // Blend in all instances of this value using a VSELECT, using a
2243  // mask where each bit signals whether that element is the one
2244  // we're after.
2246  transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
2247  return DAG.getConstant(V == V1, DL, XLenVT);
2248  });
2249  Vec = DAG.getNode(ISD::VSELECT, DL, VT,
2250  DAG.getBuildVector(SelMaskTy, DL, Ops),
2251  DAG.getSplatBuildVector(VT, DL, V), Vec);
2252  }
2253  }
2254 
2255  return Vec;
2256  }
2257 
2258  return SDValue();
2259 }
2260 
2261 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2262  SDValue Lo, SDValue Hi, SDValue VL,
2263  SelectionDAG &DAG) {
2264  if (!Passthru)
2265  Passthru = DAG.getUNDEF(VT);
2266  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2267  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2268  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2269  // If Hi constant is all the same sign bit as Lo, lower this as a custom
2270  // node in order to try and match RVV vector/scalar instructions.
2271  if ((LoC >> 31) == HiC)
2272  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
2273 
2274  // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
2275  // vmv.v.x whose EEW = 32 to lower it.
2276  auto *Const = dyn_cast<ConstantSDNode>(VL);
2277  if (LoC == HiC && Const && Const->isAllOnesValue()) {
2278  MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
2279  // TODO: if vl <= min(VLMAX), we can also do this. But we could not
2280  // access the subtarget here now.
2281  auto InterVec = DAG.getNode(
2282  RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
2283  DAG.getRegister(RISCV::X0, MVT::i32));
2284  return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
2285  }
2286  }
2287 
2288  // Fall back to a stack store and stride x0 vector load.
2289  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
2290  Hi, VL);
2291 }
2292 
2293 // Called by type legalization to handle splat of i64 on RV32.
2294 // FIXME: We can optimize this when the type has sign or zero bits in one
2295 // of the halves.
2296 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2297  SDValue Scalar, SDValue VL,
2298  SelectionDAG &DAG) {
2299  assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2300  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2301  DAG.getConstant(0, DL, MVT::i32));
2302  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2303  DAG.getConstant(1, DL, MVT::i32));
2304  return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
2305 }
2306 
2307 // This function lowers a splat of a scalar operand Splat with the vector
2308 // length VL. It ensures the final sequence is type legal, which is useful when
2309 // lowering a splat after type legalization.
2310 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
2311  MVT VT, SDLoc DL, SelectionDAG &DAG,
2312  const RISCVSubtarget &Subtarget) {
2313  bool HasPassthru = Passthru && !Passthru.isUndef();
2314  if (!HasPassthru && !Passthru)
2315  Passthru = DAG.getUNDEF(VT);
2316  if (VT.isFloatingPoint()) {
2317  // If VL is 1, we could use vfmv.s.f.
2318  if (isOneConstant(VL))
2319  return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
2320  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
2321  }
2322 
2323  MVT XLenVT = Subtarget.getXLenVT();
2324 
2325  // Simplest case is that the operand needs to be promoted to XLenVT.
2326  if (Scalar.getValueType().bitsLE(XLenVT)) {
2327  // If the operand is a constant, sign extend to increase our chances
2328  // of being able to use a .vi instruction. ANY_EXTEND would become a
2329  // a zero extend and the simm5 check in isel would fail.
2330  // FIXME: Should we ignore the upper bits in isel instead?
2331  unsigned ExtOpc =
2332  isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2333  Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2334  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
2335  // If VL is 1 and the scalar value won't benefit from immediate, we could
2336  // use vmv.s.x.
2337  if (isOneConstant(VL) &&
2338  (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2339  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
2340  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
2341  }
2342 
2343  assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2344  "Unexpected scalar for splat lowering!");
2345 
2346  if (isOneConstant(VL) && isNullConstant(Scalar))
2347  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
2348  DAG.getConstant(0, DL, XLenVT), VL);
2349 
2350  // Otherwise use the more complicated splatting algorithm.
2351  return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
2352 }
2353 
2354 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
2355  const RISCVSubtarget &Subtarget) {
2356  // We need to be able to widen elements to the next larger integer type.
2357  if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
2358  return false;
2359 
2360  int Size = Mask.size();
2361  assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
2362 
2363  int Srcs[] = {-1, -1};
2364  for (int i = 0; i != Size; ++i) {
2365  // Ignore undef elements.
2366  if (Mask[i] < 0)
2367  continue;
2368 
2369  // Is this an even or odd element.
2370  int Pol = i % 2;
2371 
2372  // Ensure we consistently use the same source for this element polarity.
2373  int Src = Mask[i] / Size;
2374  if (Srcs[Pol] < 0)
2375  Srcs[Pol] = Src;
2376  if (Srcs[Pol] != Src)
2377  return false;
2378 
2379  // Make sure the element within the source is appropriate for this element
2380  // in the destination.
2381  int Elt = Mask[i] % Size;
2382  if (Elt != i / 2)
2383  return false;
2384  }
2385 
2386  // We need to find a source for each polarity and they can't be the same.
2387  if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
2388  return false;
2389 
2390  // Swap the sources if the second source was in the even polarity.
2391  SwapSources = Srcs[0] > Srcs[1];
2392 
2393  return true;
2394 }
2395 
2396 /// Match shuffles that concatenate two vectors, rotate the concatenation,
2397 /// and then extract the original number of elements from the rotated result.
2398 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
2399 /// returned rotation amount is for a rotate right, where elements move from
2400 /// higher elements to lower elements. \p LoSrc indicates the first source
2401 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
2402 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
2403 /// 0 or 1 if a rotation is found.
2404 ///
2405 /// NOTE: We talk about rotate to the right which matches how bit shift and
2406 /// rotate instructions are described where LSBs are on the right, but LLVM IR
2407 /// and the table below write vectors with the lowest elements on the left.
2408 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
2409  int Size = Mask.size();
2410 
2411  // We need to detect various ways of spelling a rotation:
2412  // [11, 12, 13, 14, 15, 0, 1, 2]
2413  // [-1, 12, 13, 14, -1, -1, 1, -1]
2414  // [-1, -1, -1, -1, -1, -1, 1, 2]
2415  // [ 3, 4, 5, 6, 7, 8, 9, 10]
2416  // [-1, 4, 5, 6, -1, -1, 9, -1]
2417  // [-1, 4, 5, 6, -1, -1, -1, -1]
2418  int Rotation = 0;
2419  LoSrc = -1;
2420  HiSrc = -1;
2421  for (int i = 0; i != Size; ++i) {
2422  int M = Mask[i];
2423  if (M < 0)
2424  continue;
2425 
2426  // Determine where a rotate vector would have started.
2427  int StartIdx = i - (M % Size);
2428  // The identity rotation isn't interesting, stop.
2429  if (StartIdx == 0)
2430  return -1;
2431 
2432  // If we found the tail of a vector the rotation must be the missing
2433  // front. If we found the head of a vector, it must be how much of the
2434  // head.
2435  int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
2436 
2437  if (Rotation == 0)
2438  Rotation = CandidateRotation;
2439  else if (Rotation != CandidateRotation)
2440  // The rotations don't match, so we can't match this mask.
2441  return -1;
2442 
2443  // Compute which value this mask is pointing at.
2444  int MaskSrc = M < Size ? 0 : 1;
2445 
2446  // Compute which of the two target values this index should be assigned to.
2447  // This reflects whether the high elements are remaining or the low elemnts
2448  // are remaining.
2449  int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
2450 
2451  // Either set up this value if we've not encountered it before, or check
2452  // that it remains consistent.
2453  if (TargetSrc < 0)
2454  TargetSrc = MaskSrc;
2455  else if (TargetSrc != MaskSrc)
2456  // This may be a rotation, but it pulls from the inputs in some
2457  // unsupported interleaving.
2458  return -1;
2459  }
2460 
2461  // Check that we successfully analyzed the mask, and normalize the results.
2462  assert(Rotation != 0 && "Failed to locate a viable rotation!");
2463  assert((LoSrc >= 0 || HiSrc >= 0) &&
2464  "Failed to find a rotated input vector!");
2465 
2466  return Rotation;
2467 }
2468 
2470  const RISCVSubtarget &Subtarget) {
2471  SDValue V1 = Op.getOperand(0);
2472  SDValue V2 = Op.getOperand(1);
2473  SDLoc DL(Op);
2474  MVT XLenVT = Subtarget.getXLenVT();
2475  MVT VT = Op.getSimpleValueType();
2476  unsigned NumElts = VT.getVectorNumElements();
2477  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
2478 
2479  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2480 
2481  SDValue TrueMask, VL;
2482  std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2483 
2484  if (SVN->isSplat()) {
2485  const int Lane = SVN->getSplatIndex();
2486  if (Lane >= 0) {
2487  MVT SVT = VT.getVectorElementType();
2488 
2489  // Turn splatted vector load into a strided load with an X0 stride.
2490  SDValue V = V1;
2491  // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
2492  // with undef.
2493  // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
2494  int Offset = Lane;
2495  if (V.getOpcode() == ISD::CONCAT_VECTORS) {
2496  int OpElements =
2498  V = V.getOperand(Offset / OpElements);
2499  Offset %= OpElements;
2500  }
2501 
2502  // We need to ensure the load isn't atomic or volatile.
2503  if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
2504  auto *Ld = cast<LoadSDNode>(V);
2505  Offset *= SVT.getStoreSize();
2506  SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2507  TypeSize::Fixed(Offset), DL);
2508 
2509  // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2510  if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2511  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2512  SDValue IntID =
2513  DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2514  SDValue Ops[] = {Ld->getChain(),
2515  IntID,
2516  DAG.getUNDEF(ContainerVT),
2517  NewAddr,
2518  DAG.getRegister(RISCV::X0, XLenVT),
2519  VL};
2520  SDValue NewLoad = DAG.getMemIntrinsicNode(
2521  ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2523  Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2524  DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2525  return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2526  }
2527 
2528  // Otherwise use a scalar load and splat. This will give the best
2529  // opportunity to fold a splat into the operation. ISel can turn it into
2530  // the x0 strided load if we aren't able to fold away the select.
2531  if (SVT.isFloatingPoint())
2532  V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2533  Ld->getPointerInfo().getWithOffset(Offset),
2534  Ld->getOriginalAlign(),
2535  Ld->getMemOperand()->getFlags());
2536  else
2537  V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2538  Ld->getPointerInfo().getWithOffset(Offset), SVT,
2539  Ld->getOriginalAlign(),
2540  Ld->getMemOperand()->getFlags());
2541  DAG.makeEquivalentMemoryOrdering(Ld, V);
2542 
2543  unsigned Opc =
2545  SDValue Splat =
2546  DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
2547  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2548  }
2549 
2550  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2551  assert(Lane < (int)NumElts && "Unexpected lane!");
2552  SDValue Gather =
2553  DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
2554  DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
2555  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2556  }
2557  }
2558 
2559  ArrayRef<int> Mask = SVN->getMask();
2560 
2561  // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
2562  // be undef which can be handled with a single SLIDEDOWN/UP.
2563  int LoSrc, HiSrc;
2564  int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
2565  if (Rotation > 0) {
2566  SDValue LoV, HiV;
2567  if (LoSrc >= 0) {
2568  LoV = LoSrc == 0 ? V1 : V2;
2569  LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
2570  }
2571  if (HiSrc >= 0) {
2572  HiV = HiSrc == 0 ? V1 : V2;
2573  HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
2574  }
2575 
2576  // We found a rotation. We need to slide HiV down by Rotation. Then we need
2577  // to slide LoV up by (NumElts - Rotation).
2578  unsigned InvRotate = NumElts - Rotation;
2579 
2580  SDValue Res = DAG.getUNDEF(ContainerVT);
2581  if (HiV) {
2582  // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN.
2583  // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it
2584  // causes multiple vsetvlis in some test cases such as lowering
2585  // reduce.mul
2586  SDValue DownVL = VL;
2587  if (LoV)
2588  DownVL = DAG.getConstant(InvRotate, DL, XLenVT);
2589  Res =
2590  DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, Res, HiV,
2591  DAG.getConstant(Rotation, DL, XLenVT), TrueMask, DownVL);
2592  }
2593  if (LoV)
2594  Res = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Res, LoV,
2595  DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL);
2596 
2597  return convertFromScalableVector(VT, Res, DAG, Subtarget);
2598  }
2599 
2600  // Detect an interleave shuffle and lower to
2601  // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
2602  bool SwapSources;
2603  if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
2604  // Swap sources if needed.
2605  if (SwapSources)
2606  std::swap(V1, V2);
2607 
2608  // Extract the lower half of the vectors.
2609  MVT HalfVT = VT.getHalfNumVectorElementsVT();
2610  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
2611  DAG.getConstant(0, DL, XLenVT));
2612  V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
2613  DAG.getConstant(0, DL, XLenVT));
2614 
2615  // Double the element width and halve the number of elements in an int type.
2616  unsigned EltBits = VT.getScalarSizeInBits();
2617  MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
2618  MVT WideIntVT =
2619  MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
2620  // Convert this to a scalable vector. We need to base this on the
2621  // destination size to ensure there's always a type with a smaller LMUL.
2622  MVT WideIntContainerVT =
2623  getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
2624 
2625  // Convert sources to scalable vectors with the same element count as the
2626  // larger type.
2627  MVT HalfContainerVT = MVT::getVectorVT(
2628  VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
2629  V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
2630  V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
2631 
2632  // Cast sources to integer.
2633  MVT IntEltVT = MVT::getIntegerVT(EltBits);
2634  MVT IntHalfVT =
2635  MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
2636  V1 = DAG.getBitcast(IntHalfVT, V1);
2637  V2 = DAG.getBitcast(IntHalfVT, V2);
2638 
2639  // Freeze V2 since we use it twice and we need to be sure that the add and
2640  // multiply see the same value.
2641  V2 = DAG.getFreeze(V2);
2642 
2643  // Recreate TrueMask using the widened type's element count.
2644  TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG);
2645 
2646  // Widen V1 and V2 with 0s and add one copy of V2 to V1.
2647  SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
2648  V2, TrueMask, VL);
2649  // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
2650  SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
2651  DAG.getUNDEF(IntHalfVT),
2652  DAG.getAllOnesConstant(DL, XLenVT));
2653  SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
2654  V2, Multiplier, TrueMask, VL);
2655  // Add the new copies to our previous addition giving us 2^eltbits copies of
2656  // V2. This is equivalent to shifting V2 left by eltbits. This should
2657  // combine with the vwmulu.vv above to form vwmaccu.vv.
2658  Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
2659  TrueMask, VL);
2660  // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
2661  // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
2662  // vector VT.
2663  ContainerVT =
2665  WideIntContainerVT.getVectorElementCount() * 2);
2666  Add = DAG.getBitcast(ContainerVT, Add);
2667  return convertFromScalableVector(VT, Add, DAG, Subtarget);
2668  }
2669 
2670  // Detect shuffles which can be re-expressed as vector selects; these are
2671  // shuffles in which each element in the destination is taken from an element
2672  // at the corresponding index in either source vectors.
2673  bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
2674  int MaskIndex = MaskIdx.value();
2675  return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2676  });
2677 
2678  assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2679 
2680  SmallVector<SDValue> MaskVals;
2681  // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2682  // merged with a second vrgather.
2683  SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2684 
2685  // By default we preserve the original operand order, and use a mask to
2686  // select LHS as true and RHS as false. However, since RVV vector selects may
2687  // feature splats but only on the LHS, we may choose to invert our mask and
2688  // instead select between RHS and LHS.
2689  bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2690  bool InvertMask = IsSelect == SwapOps;
2691 
2692  // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2693  // half.
2694  DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2695 
2696  // Now construct the mask that will be used by the vselect or blended
2697  // vrgather operation. For vrgathers, construct the appropriate indices into
2698  // each vector.
2699  for (int MaskIndex : Mask) {
2700  bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2701  MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2702  if (!IsSelect) {
2703  bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2704  GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2705  ? DAG.getConstant(MaskIndex, DL, XLenVT)
2706  : DAG.getUNDEF(XLenVT));
2707  GatherIndicesRHS.push_back(
2708  IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2709  : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2710  if (IsLHSOrUndefIndex && MaskIndex >= 0)
2711  ++LHSIndexCounts[MaskIndex];
2712  if (!IsLHSOrUndefIndex)
2713  ++RHSIndexCounts[MaskIndex - NumElts];
2714  }
2715  }
2716 
2717  if (SwapOps) {
2718  std::swap(V1, V2);
2719  std::swap(GatherIndicesLHS, GatherIndicesRHS);
2720  }
2721 
2722  assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2723  MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2724  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2725 
2726  if (IsSelect)
2727  return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2728 
2729  if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2730  // On such a large vector we're unable to use i8 as the index type.
2731  // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2732  // may involve vector splitting if we're already at LMUL=8, or our
2733  // user-supplied maximum fixed-length LMUL.
2734  return SDValue();
2735  }
2736 
2737  unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2738  unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2739  MVT IndexVT = VT.changeTypeToInteger();
2740  // Since we can't introduce illegal index types at this stage, use i16 and
2741  // vrgatherei16 if the corresponding index type for plain vrgather is greater
2742  // than XLenVT.
2743  if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2744  GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2745  IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2746  }
2747 
2748  MVT IndexContainerVT =
2749  ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2750 
2751  SDValue Gather;
2752  // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2753  // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2754  if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2755  Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
2756  Subtarget);
2757  } else {
2758  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2759  // If only one index is used, we can use a "splat" vrgather.
2760  // TODO: We can splat the most-common index and fix-up any stragglers, if
2761  // that's beneficial.
2762  if (LHSIndexCounts.size() == 1) {
2763  int SplatIndex = LHSIndexCounts.begin()->getFirst();
2764  Gather =
2765  DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2766  DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2767  } else {
2768  SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2769  LHSIndices =
2770  convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2771 
2772  Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2773  TrueMask, VL);
2774  }
2775  }
2776 
2777  // If a second vector operand is used by this shuffle, blend it in with an
2778  // additional vrgather.
2779  if (!V2.isUndef()) {
2780  V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2781  // If only one index is used, we can use a "splat" vrgather.
2782  // TODO: We can splat the most-common index and fix-up any stragglers, if
2783  // that's beneficial.
2784  if (RHSIndexCounts.size() == 1) {
2785  int SplatIndex = RHSIndexCounts.begin()->getFirst();
2786  V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2787  DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2788  } else {
2789  SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2790  RHSIndices =
2791  convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2792  V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
2793  VL);
2794  }
2795 
2796  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2797  SelectMask =
2798  convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2799 
2800  Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
2801  Gather, VL);
2802  }
2803 
2804  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2805 }
2806 
2808  // Support splats for any type. These should type legalize well.
2809  if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
2810  return true;
2811 
2812  // Only support legal VTs for other shuffles for now.
2813  if (!isTypeLegal(VT))
2814  return false;
2815 
2816  MVT SVT = VT.getSimpleVT();
2817 
2818  bool SwapSources;
2819  int LoSrc, HiSrc;
2820  return (isElementRotate(LoSrc, HiSrc, M) > 0) ||
2821  isInterleaveShuffle(M, SVT, SwapSources, Subtarget);
2822 }
2823 
2824 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
2825 // the exponent.
2827  MVT VT = Op.getSimpleValueType();
2828  unsigned EltSize = VT.getScalarSizeInBits();
2829  SDValue Src = Op.getOperand(0);
2830  SDLoc DL(Op);
2831 
2832  // We need a FP type that can represent the value.
2833  // TODO: Use f16 for i8 when possible?
2834  MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
2835  MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
2836 
2837  // Legal types should have been checked in the RISCVTargetLowering
2838  // constructor.
2839  // TODO: Splitting may make sense in some cases.
2840  assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
2841  "Expected legal float type!");
2842 
2843  // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
2844  // The trailing zero count is equal to log2 of this single bit value.
2845  if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
2846  SDValue Neg =
2847  DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
2848  Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
2849  }
2850 
2851  // We have a legal FP type, convert to it.
2852  SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
2853  // Bitcast to integer and shift the exponent to the LSB.
2854  EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
2855  SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
2856  unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
2857  SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
2858  DAG.getConstant(ShiftAmt, DL, IntVT));
2859  // Truncate back to original type to allow vnsrl.
2860  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
2861  // The exponent contains log2 of the value in biased form.
2862  unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
2863 
2864  // For trailing zeros, we just need to subtract the bias.
2865  if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
2866  return DAG.getNode(ISD::SUB, DL, VT, Trunc,
2867  DAG.getConstant(ExponentBias, DL, VT));
2868 
2869  // For leading zeros, we need to remove the bias and convert from log2 to
2870  // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
2871  unsigned Adjust = ExponentBias + (EltSize - 1);
2872  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
2873 }
2874 
2875 // While RVV has alignment restrictions, we should always be able to load as a
2876 // legal equivalently-sized byte-typed vector instead. This method is
2877 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2878 // the load is already correctly-aligned, it returns SDValue().
2879 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2880  SelectionDAG &DAG) const {
2881  auto *Load = cast<LoadSDNode>(Op);
2882  assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2883 
2885  Load->getMemoryVT(),
2886  *Load->getMemOperand()))
2887  return SDValue();
2888 
2889  SDLoc DL(Op);
2890  MVT VT = Op.getSimpleValueType();
2891  unsigned EltSizeBits = VT.getScalarSizeInBits();
2892  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2893  "Unexpected unaligned RVV load type");
2894  MVT NewVT =
2895  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2896  assert(NewVT.isValid() &&
2897  "Expecting equally-sized RVV vector types to be legal");
2898  SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2899  Load->getPointerInfo(), Load->getOriginalAlign(),
2900  Load->getMemOperand()->getFlags());
2901  return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2902 }
2903 
2904 // While RVV has alignment restrictions, we should always be able to store as a
2905 // legal equivalently-sized byte-typed vector instead. This method is
2906 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2907 // returns SDValue() if the store is already correctly aligned.
2908 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2909  SelectionDAG &DAG) const {
2910  auto *Store = cast<StoreSDNode>(Op);
2911  assert(Store && Store->getValue().getValueType().isVector() &&
2912  "Expected vector store");
2913 
2915  Store->getMemoryVT(),
2916  *Store->getMemOperand()))
2917  return SDValue();
2918 
2919  SDLoc DL(Op);
2920  SDValue StoredVal = Store->getValue();
2921  MVT VT = StoredVal.getSimpleValueType();
2922  unsigned EltSizeBits = VT.getScalarSizeInBits();
2923  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2924  "Unexpected unaligned RVV store type");
2925  MVT NewVT =
2926  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2927  assert(NewVT.isValid() &&
2928  "Expecting equally-sized RVV vector types to be legal");
2929  StoredVal = DAG.getBitcast(NewVT, StoredVal);
2930  return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2931  Store->getPointerInfo(), Store->getOriginalAlign(),
2932  Store->getMemOperand()->getFlags());
2933 }
2934 
2936  SelectionDAG &DAG) const {
2937  switch (Op.getOpcode()) {
2938  default:
2939  report_fatal_error("unimplemented operand");
2940  case ISD::GlobalAddress:
2941  return lowerGlobalAddress(Op, DAG);
2942  case ISD::BlockAddress:
2943  return lowerBlockAddress(Op, DAG);
2944  case ISD::ConstantPool:
2945  return lowerConstantPool(Op, DAG);
2946  case ISD::JumpTable:
2947  return lowerJumpTable(Op, DAG);
2948  case ISD::GlobalTLSAddress:
2949  return lowerGlobalTLSAddress(Op, DAG);
2950  case ISD::SELECT:
2951  return lowerSELECT(Op, DAG);
2952  case ISD::BRCOND:
2953  return lowerBRCOND(Op, DAG);
2954  case ISD::VASTART:
2955  return lowerVASTART(Op, DAG);
2956  case ISD::FRAMEADDR:
2957  return lowerFRAMEADDR(Op, DAG);
2958  case ISD::RETURNADDR:
2959  return lowerRETURNADDR(Op, DAG);
2960  case ISD::SHL_PARTS:
2961  return lowerShiftLeftParts(Op, DAG);
2962  case ISD::SRA_PARTS:
2963  return lowerShiftRightParts(Op, DAG, true);
2964  case ISD::SRL_PARTS:
2965  return lowerShiftRightParts(Op, DAG, false);
2966  case ISD::BITCAST: {
2967  SDLoc DL(Op);
2968  EVT VT = Op.getValueType();
2969  SDValue Op0 = Op.getOperand(0);
2970  EVT Op0VT = Op0.getValueType();
2971  MVT XLenVT = Subtarget.getXLenVT();
2972  if (VT.isFixedLengthVector()) {
2973  // We can handle fixed length vector bitcasts with a simple replacement
2974  // in isel.
2975  if (Op0VT.isFixedLengthVector())
2976  return Op;
2977  // When bitcasting from scalar to fixed-length vector, insert the scalar
2978  // into a one-element vector of the result type, and perform a vector
2979  // bitcast.
2980  if (!Op0VT.isVector()) {
2981  EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2982  if (!isTypeLegal(BVT))
2983  return SDValue();
2984  return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2985  DAG.getUNDEF(BVT), Op0,
2986  DAG.getConstant(0, DL, XLenVT)));
2987  }
2988  return SDValue();
2989  }
2990  // Custom-legalize bitcasts from fixed-length vector types to scalar types
2991  // thus: bitcast the vector to a one-element vector type whose element type
2992  // is the same as the result type, and extract the first element.
2993  if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2994  EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
2995  if (!isTypeLegal(BVT))
2996  return SDValue();
2997  SDValue BVec = DAG.getBitcast(BVT, Op0);
2998  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
2999  DAG.getConstant(0, DL, XLenVT));
3000  }
3001  if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
3002  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
3003  SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
3004  return FPConv;
3005  }
3006  if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
3007  Subtarget.hasStdExtF()) {
3008  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3009  SDValue FPConv =
3010  DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
3011  return FPConv;
3012  }
3013  return SDValue();
3014  }
3016  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3018  return LowerINTRINSIC_W_CHAIN(Op, DAG);
3019  case ISD::INTRINSIC_VOID:
3020  return LowerINTRINSIC_VOID(Op, DAG);
3021  case ISD::BSWAP:
3022  case ISD::BITREVERSE: {
3023  MVT VT = Op.getSimpleValueType();
3024  SDLoc DL(Op);
3025  if (Subtarget.hasStdExtZbp()) {
3026  // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
3027  // Start with the maximum immediate value which is the bitwidth - 1.
3028  unsigned Imm = VT.getSizeInBits() - 1;
3029  // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
3030  if (Op.getOpcode() == ISD::BSWAP)
3031  Imm &= ~0x7U;
3032  return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
3033  DAG.getConstant(Imm, DL, VT));
3034  }
3035  assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
3036  assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
3037  // Expand bitreverse to a bswap(rev8) followed by brev8.
3038  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
3039  // We use the Zbp grevi encoding for rev.b/brev8 which will be recognized
3040  // as brev8 by an isel pattern.
3041  return DAG.getNode(RISCVISD::GREV, DL, VT, BSwap,
3042  DAG.getConstant(7, DL, VT));
3043  }
3044  case ISD::FSHL:
3045  case ISD::FSHR: {
3046  MVT VT = Op.getSimpleValueType();
3047  assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
3048  SDLoc DL(Op);
3049  // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
3050  // use log(XLen) bits. Mask the shift amount accordingly to prevent
3051  // accidentally setting the extra bit.
3052  unsigned ShAmtWidth = Subtarget.getXLen() - 1;
3053  SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
3054  DAG.getConstant(ShAmtWidth, DL, VT));
3055  // fshl and fshr concatenate their operands in the same order. fsr and fsl
3056  // instruction use different orders. fshl will return its first operand for
3057  // shift of zero, fshr will return its second operand. fsl and fsr both
3058  // return rs1 so the ISD nodes need to have different operand orders.
3059  // Shift amount is in rs2.
3060  SDValue Op0 = Op.getOperand(0);
3061  SDValue Op1 = Op.getOperand(1);
3062  unsigned Opc = RISCVISD::FSL;
3063  if (Op.getOpcode() == ISD::FSHR) {
3064  std::swap(Op0, Op1);
3065  Opc = RISCVISD::FSR;
3066  }
3067  return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
3068  }
3069  case ISD::TRUNCATE:
3070  // Only custom-lower vector truncates
3071  if (!Op.getSimpleValueType().isVector())
3072  return Op;
3073  return lowerVectorTruncLike(Op, DAG);
3074  case ISD::ANY_EXTEND:
3075  case ISD::ZERO_EXTEND:
3076  if (Op.getOperand(0).getValueType().isVector() &&
3077  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3078  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
3079  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
3080  case ISD::SIGN_EXTEND:
3081  if (Op.getOperand(0).getValueType().isVector() &&
3082  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3083  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
3084  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
3086  return lowerSPLAT_VECTOR_PARTS(Op, DAG);
3088  return lowerINSERT_VECTOR_ELT(Op, DAG);
3090  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
3091  case ISD::VSCALE: {
3092  MVT VT = Op.getSimpleValueType();
3093  SDLoc DL(Op);
3094  SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
3095  // We define our scalable vector types for lmul=1 to use a 64 bit known
3096  // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
3097  // vscale as VLENB / 8.
3098  static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
3099  if (Subtarget.getMinVLen() < RISCV::RVVBitsPerBlock)
3100  report_fatal_error("Support for VLEN==32 is incomplete.");
3101  // We assume VLENB is a multiple of 8. We manually choose the best shift
3102  // here because SimplifyDemandedBits isn't always able to simplify it.
3103  uint64_t Val = Op.getConstantOperandVal(0);
3104  if (isPowerOf2_64(Val)) {
3105  uint64_t Log2 = Log2_64(Val);
3106  if (Log2 < 3)
3107  return DAG.getNode(ISD::SRL, DL, VT, VLENB,
3108  DAG.getConstant(3 - Log2, DL, VT));
3109  if (Log2 > 3)
3110  return DAG.getNode(ISD::SHL, DL, VT, VLENB,
3111  DAG.getConstant(Log2 - 3, DL, VT));
3112  return VLENB;
3113  }
3114  // If the multiplier is a multiple of 8, scale it down to avoid needing
3115  // to shift the VLENB value.
3116  if ((Val % 8) == 0)
3117  return DAG.getNode(ISD::MUL, DL, VT, VLENB,
3118  DAG.getConstant(Val / 8, DL, VT));
3119 
3120  SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
3121  DAG.getConstant(3, DL, VT));
3122  return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
3123  }
3124  case ISD::FPOWI: {
3125  // Custom promote f16 powi with illegal i32 integer type on RV64. Once
3126  // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
3127  if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
3128  Op.getOperand(1).getValueType() == MVT::i32) {
3129  SDLoc DL(Op);
3130  SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
3131  SDValue Powi =
3132  DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
3133  return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
3134  DAG.getIntPtrConstant(0, DL));
3135  }
3136  return SDValue();
3137  }
3138  case ISD::FP_EXTEND:
3139  case ISD::FP_ROUND:
3140  if (!Op.getValueType().isVector())
3141  return Op;
3142  return lowerVectorFPExtendOrRoundLike(Op, DAG);
3143  case ISD::FP_TO_SINT:
3144  case ISD::FP_TO_UINT:
3145  case ISD::SINT_TO_FP:
3146  case ISD::UINT_TO_FP: {
3147  // RVV can only do fp<->int conversions to types half/double the size as
3148  // the source. We custom-lower any conversions that do two hops into
3149  // sequences.
3150  MVT VT = Op.getSimpleValueType();
3151  if (!VT.isVector())
3152  return Op;
3153  SDLoc DL(Op);
3154  SDValue Src = Op.getOperand(0);
3155  MVT EltVT = VT.getVectorElementType();
3156  MVT SrcVT = Src.getSimpleValueType();
3157  MVT SrcEltVT = SrcVT.getVectorElementType();
3158  unsigned EltSize = EltVT.getSizeInBits();
3159  unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3160  assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
3161  "Unexpected vector element types");
3162 
3163  bool IsInt2FP = SrcEltVT.isInteger();
3164  // Widening conversions
3165  if (EltSize > (2 * SrcEltSize)) {
3166  if (IsInt2FP) {
3167  // Do a regular integer sign/zero extension then convert to float.
3168  MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize),
3169  VT.getVectorElementCount());
3170  unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
3172  : ISD::SIGN_EXTEND;
3173  SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
3174  return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
3175  }
3176  // FP2Int
3177  assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
3178  // Do one doubling fp_extend then complete the operation by converting
3179  // to int.
3180  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3181  SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
3182  return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
3183  }
3184 
3185  // Narrowing conversions
3186  if (SrcEltSize > (2 * EltSize)) {
3187  if (IsInt2FP) {
3188  // One narrowing int_to_fp, then an fp_round.
3189  assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
3190  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3191  SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
3192  return DAG.getFPExtendOrRound(Int2FP, DL, VT);
3193  }
3194  // FP2Int
3195  // One narrowing fp_to_int, then truncate the integer. If the float isn't
3196  // representable by the integer, the result is poison.
3197  MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
3198  VT.getVectorElementCount());
3199  SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
3200  return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
3201  }
3202 
3203  // Scalable vectors can exit here. Patterns will handle equally-sized
3204  // conversions halving/doubling ones.
3205  if (!VT.isFixedLengthVector())
3206  return Op;
3207 
3208  // For fixed-length vectors we lower to a custom "VL" node.
3209  unsigned RVVOpc = 0;
3210  switch (Op.getOpcode()) {
3211  default:
3212  llvm_unreachable("Impossible opcode");
3213  case ISD::FP_TO_SINT:
3214  RVVOpc = RISCVISD::FP_TO_SINT_VL;
3215  break;
3216  case ISD::FP_TO_UINT:
3217  RVVOpc = RISCVISD::FP_TO_UINT_VL;
3218  break;
3219  case ISD::SINT_TO_FP:
3220  RVVOpc = RISCVISD::SINT_TO_FP_VL;
3221  break;
3222  case ISD::UINT_TO_FP:
3223  RVVOpc = RISCVISD::UINT_TO_FP_VL;
3224  break;
3225  }
3226 
3227  MVT ContainerVT, SrcContainerVT;
3228  // Derive the reference container type from the larger vector type.
3229  if (SrcEltSize > EltSize) {
3230  SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3231  ContainerVT =
3232  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3233  } else {
3234  ContainerVT = getContainerForFixedLengthVector(VT);
3235  SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
3236  }
3237 
3238  SDValue Mask, VL;
3239  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3240 
3241  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3242  Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
3243  return convertFromScalableVector(VT, Src, DAG, Subtarget);
3244  }
3245  case ISD::FP_TO_SINT_SAT:
3246  case ISD::FP_TO_UINT_SAT:
3247  return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
3248  case ISD::FTRUNC:
3249  case ISD::FCEIL:
3250  case ISD::FFLOOR:
3251  return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
3252  case ISD::FROUND:
3253  return lowerFROUND(Op, DAG);
3254  case ISD::VECREDUCE_ADD:
3255  case ISD::VECREDUCE_UMAX:
3256  case ISD::VECREDUCE_SMAX:
3257  case ISD::VECREDUCE_UMIN:
3258  case ISD::VECREDUCE_SMIN:
3259  return lowerVECREDUCE(Op, DAG);
3260  case ISD::VECREDUCE_AND:
3261  case ISD::VECREDUCE_OR:
3262  case ISD::VECREDUCE_XOR:
3263  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3264  return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
3265  return lowerVECREDUCE(Op, DAG);
3266  case ISD::VECREDUCE_FADD:
3268  case ISD::VECREDUCE_FMIN:
3269  case ISD::VECREDUCE_FMAX:
3270  return lowerFPVECREDUCE(Op, DAG);
3271  case ISD::VP_REDUCE_ADD:
3272  case ISD::VP_REDUCE_UMAX:
3273  case ISD::VP_REDUCE_SMAX:
3274  case ISD::VP_REDUCE_UMIN:
3275  case ISD::VP_REDUCE_SMIN:
3276  case ISD::VP_REDUCE_FADD:
3277  case ISD::VP_REDUCE_SEQ_FADD:
3278  case ISD::VP_REDUCE_FMIN:
3279  case ISD::VP_REDUCE_FMAX:
3280  return lowerVPREDUCE(Op, DAG);
3281  case ISD::VP_REDUCE_AND:
3282  case ISD::VP_REDUCE_OR:
3283  case ISD::VP_REDUCE_XOR:
3284  if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
3285  return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
3286  return lowerVPREDUCE(Op, DAG);
3287  case ISD::INSERT_SUBVECTOR:
3288  return lowerINSERT_SUBVECTOR(Op, DAG);
3290  return lowerEXTRACT_SUBVECTOR(Op, DAG);
3291  case ISD::STEP_VECTOR:
3292  return lowerSTEP_VECTOR(Op, DAG);
3293  case ISD::VECTOR_REVERSE:
3294  return lowerVECTOR_REVERSE(Op, DAG);
3295  case ISD::VECTOR_SPLICE:
3296  return lowerVECTOR_SPLICE(Op, DAG);
3297  case ISD::BUILD_VECTOR:
3298  return lowerBUILD_VECTOR(Op, DAG, Subtarget);
3299  case ISD::SPLAT_VECTOR:
3300  if (Op.getValueType().getVectorElementType() == MVT::i1)
3301  return lowerVectorMaskSplat(Op, DAG);
3302  return SDValue();
3303  case ISD::VECTOR_SHUFFLE:
3304  return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
3305  case ISD::CONCAT_VECTORS: {
3306  // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
3307  // better than going through the stack, as the default expansion does.
3308  SDLoc DL(Op);
3309  MVT VT = Op.getSimpleValueType();
3310  unsigned NumOpElts =
3311  Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
3312  SDValue Vec = DAG.getUNDEF(VT);
3313  for (const auto &OpIdx : enumerate(Op->ops())) {
3314  SDValue SubVec = OpIdx.value();
3315  // Don't insert undef subvectors.
3316  if (SubVec.isUndef())
3317  continue;
3318  Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
3319  DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
3320  }
3321  return Vec;
3322  }
3323  case ISD::LOAD:
3324  if (auto V = expandUnalignedRVVLoad(Op, DAG))
3325  return V;
3326  if (Op.getValueType().isFixedLengthVector())
3327  return lowerFixedLengthVectorLoadToRVV(Op, DAG);
3328  return Op;
3329  case ISD::STORE:
3330  if (auto V = expandUnalignedRVVStore(Op, DAG))
3331  return V;
3332  if (Op.getOperand(1).getValueType().isFixedLengthVector())
3333  return lowerFixedLengthVectorStoreToRVV(Op, DAG);
3334  return Op;
3335  case ISD::MLOAD:
3336  case ISD::VP_LOAD:
3337  return lowerMaskedLoad(Op, DAG);
3338  case ISD::MSTORE:
3339  case ISD::VP_STORE:
3340  return lowerMaskedStore(Op, DAG);
3341  case ISD::SETCC:
3342  return lowerFixedLengthVectorSetccToRVV(Op, DAG);
3343  case ISD::ADD:
3344  return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
3345  case ISD::SUB:
3346  return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
3347  case ISD::MUL:
3348  return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
3349  case ISD::MULHS:
3350  return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
3351  case ISD::MULHU:
3352  return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
3353  case ISD::AND:
3354  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
3356  case ISD::OR:
3357  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
3358  RISCVISD::OR_VL);
3359  case ISD::XOR:
3360  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
3362  case ISD::SDIV:
3363  return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
3364  case ISD::SREM:
3365  return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
3366  case ISD::UDIV:
3367  return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
3368  case ISD::UREM:
3369  return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
3370  case ISD::SHL:
3371  case ISD::SRA:
3372  case ISD::SRL:
3373  if (Op.getSimpleValueType().isFixedLengthVector())
3374  return lowerFixedLengthVectorShiftToRVV(Op, DAG);
3375  // This can be called for an i32 shift amount that needs to be promoted.
3376  assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
3377  "Unexpected custom legalisation");
3378  return SDValue();
3379  case ISD::SADDSAT:
3380  return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
3381  case ISD::UADDSAT:
3382  return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
3383  case ISD::SSUBSAT:
3384  return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
3385  case ISD::USUBSAT:
3386  return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
3387  case ISD::FADD:
3388  return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
3389  case ISD::FSUB:
3390  return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
3391  case ISD::FMUL:
3392  return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
3393  case ISD::FDIV:
3394  return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
3395  case ISD::FNEG:
3396  return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
3397  case ISD::FABS:
3398  return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
3399  case ISD::FSQRT:
3400  return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
3401  case ISD::FMA:
3402  return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
3403  case ISD::SMIN:
3404  return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
3405  case ISD::SMAX:
3406  return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
3407  case ISD::UMIN:
3408  return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
3409  case ISD::UMAX:
3410  return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
3411  case ISD::FMINNUM:
3412  return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
3413  case ISD::FMAXNUM:
3414  return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
3415  case ISD::ABS:
3416  return lowerABS(Op, DAG);
3417  case ISD::CTLZ_ZERO_UNDEF:
3418  case ISD::CTTZ_ZERO_UNDEF:
3419  return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
3420  case ISD::VSELECT:
3421  return lowerFixedLengthVectorSelectToRVV(Op, DAG);
3422  case ISD::FCOPYSIGN:
3423  return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
3424  case ISD::MGATHER:
3425  case ISD::VP_GATHER:
3426  return lowerMaskedGather(Op, DAG);
3427  case ISD::MSCATTER:
3428  case ISD::VP_SCATTER:
3429  return lowerMaskedScatter(Op, DAG);
3430  case ISD::FLT_ROUNDS_:
3431  return lowerGET_ROUNDING(Op, DAG);
3432  case ISD::SET_ROUNDING:
3433  return lowerSET_ROUNDING(Op, DAG);
3434  case ISD::VP_SELECT:
3435  return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
3436  case ISD::VP_MERGE:
3437  return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
3438  case ISD::VP_ADD:
3439  return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
3440  case ISD::VP_SUB:
3441  return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
3442  case ISD::VP_MUL:
3443  return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
3444  case ISD::VP_SDIV:
3445  return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
3446  case ISD::VP_UDIV:
3447  return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
3448  case ISD::VP_SREM:
3449  return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
3450  case ISD::VP_UREM:
3451  return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
3452  case ISD::VP_AND:
3453  return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
3454  case ISD::VP_OR:
3455  return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
3456  case ISD::VP_XOR:
3457  return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
3458  case ISD::VP_ASHR:
3459  return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
3460  case ISD::VP_LSHR:
3461  return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
3462  case ISD::VP_SHL:
3463  return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
3464  case ISD::VP_FADD:
3465  return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
3466  case ISD::VP_FSUB:
3467  return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
3468  case ISD::VP_FMUL:
3469  return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
3470  case ISD::VP_FDIV:
3471  return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
3472  case ISD::VP_FNEG:
3473  return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
3474  case ISD::VP_FMA:
3475  return lowerVPOp(Op, DAG, RISCVISD::FMA_VL);
3476  case ISD::VP_SIGN_EXTEND:
3477  case ISD::VP_ZERO_EXTEND:
3478  if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
3479  return lowerVPExtMaskOp(Op, DAG);
3480  return lowerVPOp(Op, DAG,
3481  Op.getOpcode() == ISD::VP_SIGN_EXTEND
3483  : RISCVISD::VZEXT_VL);
3484  case ISD::VP_TRUNCATE:
3485  return lowerVectorTruncLike(Op, DAG);
3486  case ISD::VP_FP_EXTEND:
3487  case ISD::VP_FP_ROUND:
3488  return lowerVectorFPExtendOrRoundLike(Op, DAG);
3489  case ISD::VP_FPTOSI:
3490  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL);
3491  case ISD::VP_FPTOUI:
3492  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_UINT_VL);
3493  case ISD::VP_SITOFP:
3494  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
3495  case ISD::VP_UITOFP:
3496  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
3497  case ISD::VP_SETCC:
3498  if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
3499  return lowerVPSetCCMaskOp(Op, DAG);
3500  return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL);
3501  }
3502 }
3503 
3505  SelectionDAG &DAG, unsigned Flags) {
3506  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3507 }
3508 
3510  SelectionDAG &DAG, unsigned Flags) {
3511  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3512  Flags);
3513 }
3514 
3516  SelectionDAG &DAG, unsigned Flags) {
3517  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3518  N->getOffset(), Flags);
3519 }
3520 
3522  SelectionDAG &DAG, unsigned Flags) {
3523  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3524 }
3525 
3526 template <class NodeTy>
3528  bool IsLocal) const {
3529  SDLoc DL(N);
3530  EVT Ty = getPointerTy(DAG.getDataLayout());
3531 
3532  if (isPositionIndependent()) {
3533  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3534  if (IsLocal)
3535  // Use PC-relative addressing to access the symbol. This generates the
3536  // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
3537  // %pcrel_lo(auipc)).
3538  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3539 
3540  // Use PC-relative addressing to access the GOT for this symbol, then load
3541  // the address from the GOT. This generates the pattern (PseudoLA sym),
3542  // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
3543  SDValue Load =
3544  SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
3545  MachineFunction &MF = DAG.getMachineFunction();
3550  LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3551  DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3552  return Load;
3553  }
3554 
3555  switch (getTargetMachine().getCodeModel()) {
3556  default:
3557  report_fatal_error("Unsupported code model for lowering");
3558  case CodeModel::Small: {
3559  // Generate a sequence for accessing addresses within the first 2 GiB of
3560  // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
3561  SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
3562  SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
3563  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3564  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
3565  }
3566  case CodeModel::Medium: {
3567  // Generate a sequence for accessing addresses within any 2GiB range within
3568  // the address space. This generates the pattern (PseudoLLA sym), which
3569  // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
3570  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3571  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3572  }
3573  }
3574 }
3575 
3576 template SDValue RISCVTargetLowering::getAddr<GlobalAddressSDNode>(
3577  GlobalAddressSDNode *N, SelectionDAG &DAG, bool IsLocal) const;
3578 template SDValue RISCVTargetLowering::getAddr<BlockAddressSDNode>(
3579  BlockAddressSDNode *N, SelectionDAG &DAG, bool IsLocal) const;
3580 template SDValue RISCVTargetLowering::getAddr<ConstantPoolSDNode>(
3581  ConstantPoolSDNode *N, SelectionDAG &DAG, bool IsLocal) const;
3582 template SDValue RISCVTargetLowering::getAddr<JumpTableSDNode>(
3583  JumpTableSDNode *N, SelectionDAG &DAG, bool IsLocal) const;
3584 
3585 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
3586  SelectionDAG &DAG) const {
3587  SDLoc DL(Op);
3588  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3589  assert(N->getOffset() == 0 && "unexpected offset in global node");
3590 
3591  const GlobalValue *GV = N->getGlobal();
3592  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
3593  return getAddr(N, DAG, IsLocal);
3594 }
3595 
3596 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
3597  SelectionDAG &DAG) const {
3598  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
3599 
3600  return getAddr(N, DAG);
3601 }
3602 
3603 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
3604  SelectionDAG &DAG) const {
3605  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
3606 
3607  return getAddr(N, DAG);
3608 }
3609 
3610 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
3611  SelectionDAG &DAG) const {
3612  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
3613 
3614  return getAddr(N, DAG);
3615 }
3616 
3617 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3618  SelectionDAG &DAG,
3619  bool UseGOT) const {
3620  SDLoc DL(N);
3621  EVT Ty = getPointerTy(DAG.getDataLayout());
3622  const GlobalValue *GV = N->getGlobal();
3623  MVT XLenVT = Subtarget.getXLenVT();
3624 
3625  if (UseGOT) {
3626  // Use PC-relative addressing to access the GOT for this TLS symbol, then
3627  // load the address from the GOT and add the thread pointer. This generates
3628  // the pattern (PseudoLA_TLS_IE sym), which expands to
3629  // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
3630  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3631  SDValue Load =
3632  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
3633  MachineFunction &MF = DAG.getMachineFunction();
3638  LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3639  DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3640 
3641  // Add the thread pointer.
3642  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3643  return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
3644  }
3645 
3646  // Generate a sequence for accessing the address relative to the thread
3647  // pointer, with the appropriate adjustment for the thread pointer offset.
3648  // This generates the pattern
3649  // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
3650  SDValue AddrHi =
3652  SDValue AddrAdd =
3654  SDValue AddrLo =
3656 
3657  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3658  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3659  SDValue MNAdd = SDValue(
3660  DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
3661  0);
3662  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
3663 }
3664 
3665 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3666  SelectionDAG &DAG) const {
3667  SDLoc DL(N);
3668  EVT Ty = getPointerTy(DAG.getDataLayout());
3669  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3670  const GlobalValue *GV = N->getGlobal();
3671 
3672  // Use a PC-relative addressing mode to access the global dynamic GOT address.
3673  // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3674  // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3675  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3676  SDValue Load =
3677  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
3678 
3679  // Prepare argument list to generate call.
3680  ArgListTy Args;
3681  ArgListEntry Entry;
3682  Entry.Node = Load;
3683  Entry.Ty = CallTy;
3684  Args.push_back(Entry);
3685 
3686  // Setup call to __tls_get_addr.
3688  CLI.setDebugLoc(DL)
3689  .setChain(DAG.getEntryNode())
3690  .setLibCallee(CallingConv::C, CallTy,
3691  DAG.getExternalSymbol("__tls_get_addr", Ty),
3692  std::move(Args));
3693 
3694  return LowerCallTo(CLI).first;
3695 }
3696 
3697 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3698  SelectionDAG &DAG) const {
3699  SDLoc DL(Op);
3700  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3701  assert(N->getOffset() == 0 && "unexpected offset in global node");
3702 
3704 
3707  report_fatal_error("In GHC calling convention TLS is not supported");
3708 
3709  SDValue Addr;
3710  switch (Model) {
3711  case TLSModel::LocalExec:
3712  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3713  break;
3714  case TLSModel::InitialExec:
3715  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3716  break;
3719  Addr = getDynamicTLSAddr(N, DAG);
3720  break;
3721  }
3722 
3723  return Addr;
3724 }
3725 
3726 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3727  SDValue CondV = Op.getOperand(0);
3728  SDValue TrueV = Op.getOperand(1);
3729  SDValue FalseV = Op.getOperand(2);
3730  SDLoc DL(Op);
3731  MVT VT = Op.getSimpleValueType();
3732  MVT XLenVT = Subtarget.getXLenVT();
3733 
3734  // Lower vector SELECTs to VSELECTs by splatting the condition.
3735  if (VT.isVector()) {
3736  MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3737  SDValue CondSplat = VT.isScalableVector()
3738  ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3739  : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3740  return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3741  }
3742 
3743  // If the result type is XLenVT and CondV is the output of a SETCC node
3744  // which also operated on XLenVT inputs, then merge the SETCC node into the
3745  // lowered RISCVISD::SELECT_CC to take advantage of the integer
3746  // compare+branch instructions. i.e.:
3747  // (select (setcc lhs, rhs, cc), truev, falsev)
3748  // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3749  if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3750  CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3751  SDValue LHS = CondV.getOperand(0);
3752  SDValue RHS = CondV.getOperand(1);
3753  const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3754  ISD::CondCode CCVal = CC->get();
3755 
3756  // Special case for a select of 2 constants that have a diffence of 1.
3757  // Normally this is done by DAGCombine, but if the select is introduced by
3758  // type legalization or op legalization, we miss it. Restricting to SETLT
3759  // case for now because that is what signed saturating add/sub need.
3760  // FIXME: We don't need the condition to be SETLT or even a SETCC,
3761  // but we would probably want to swap the true/false values if the condition
3762  // is SETGE/SETLE to avoid an XORI.
3763  if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3764  CCVal == ISD::SETLT) {
3765  const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
3766  const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
3767  if (TrueVal - 1 == FalseVal)
3768  return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
3769  if (TrueVal + 1 == FalseVal)
3770  return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
3771  }
3772 
3773  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3774 
3775  SDValue TargetCC = DAG.getCondCode(CCVal);
3776  SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
3777  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3778  }
3779 
3780  // Otherwise:
3781  // (select condv, truev, falsev)
3782  // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
3783  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3784  SDValue SetNE = DAG.getCondCode(ISD::SETNE);
3785 
3786  SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
3787 
3788  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3789 }
3790 
3791 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
3792  SDValue CondV = Op.getOperand(1);
3793  SDLoc DL(Op);
3794  MVT XLenVT = Subtarget.getXLenVT();
3795 
3796  if (CondV.getOpcode() == ISD::SETCC &&
3797  CondV.getOperand(0).getValueType() == XLenVT) {
3798  SDValue LHS = CondV.getOperand(0);
3799  SDValue RHS = CondV.getOperand(1);
3800  ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
3801 
3802  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3803 
3804  SDValue TargetCC = DAG.getCondCode(CCVal);
3805  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3806  LHS, RHS, TargetCC, Op.getOperand(2));
3807  }
3808 
3809  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3810  CondV, DAG.getConstant(0, DL, XLenVT),
3811  DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3812 }
3813 
3814 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3815  MachineFunction &MF = DAG.getMachineFunction();
3817 
3818  SDLoc DL(Op);
3819  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3820  getPointerTy(MF.getDataLayout()));
3821 
3822  // vastart just stores the address of the VarArgsFrameIndex slot into the
3823  // memory location argument.
3824  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3825  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3826  MachinePointerInfo(SV));
3827 }
3828 
3829 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3830  SelectionDAG &DAG) const {
3831  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3832  MachineFunction &MF = DAG.getMachineFunction();
3833  MachineFrameInfo &MFI = MF.getFrameInfo();
3834  MFI.setFrameAddressIsTaken(true);
3835  Register FrameReg = RI.getFrameRegister(MF);
3836  int XLenInBytes = Subtarget.getXLen() / 8;
3837 
3838  EVT VT = Op.getValueType();
3839  SDLoc DL(Op);
3840  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3841  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3842  while (Depth--) {
3843  int Offset = -(XLenInBytes * 2);
3844  SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3845  DAG.getIntPtrConstant(Offset, DL));
3846  FrameAddr =
3847  DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3848  }
3849  return FrameAddr;
3850 }
3851 
3852 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3853  SelectionDAG &DAG) const {
3854  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3855  MachineFunction &MF = DAG.getMachineFunction();
3856  MachineFrameInfo &MFI = MF.getFrameInfo();
3857  MFI.setReturnAddressIsTaken(true);
3858  MVT XLenVT = Subtarget.getXLenVT();
3859  int XLenInBytes = Subtarget.getXLen() / 8;
3860 
3862  return SDValue();
3863 
3864  EVT VT = Op.getValueType();
3865  SDLoc DL(Op);
3866  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3867  if (Depth) {
3868  int Off = -XLenInBytes;
3869  SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3870  SDValue Offset = DAG.getConstant(Off, DL, VT);
3871  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3872  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3873  MachinePointerInfo());
3874  }
3875 
3876  // Return the value of the return address register, marking it an implicit
3877  // live-in.
3878  Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3879  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3880 }
3881 
3882 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3883  SelectionDAG &DAG) const {
3884  SDLoc DL(Op);
3885  SDValue Lo = Op.getOperand(0);
3886  SDValue Hi = Op.getOperand(1);
3887  SDValue Shamt = Op.getOperand(2);
3888  EVT VT = Lo.getValueType();
3889 
3890  // if Shamt-XLEN < 0: // Shamt < XLEN
3891  // Lo = Lo << Shamt
3892  // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
3893  // else:
3894  // Lo = 0
3895  // Hi = Lo << (Shamt-XLEN)
3896 
3897  SDValue Zero = DAG.getConstant(0, DL, VT);
3898  SDValue One = DAG.getConstant(1, DL, VT);
3899  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3900  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3901  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3902  SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
3903 
3904  SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3905  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3906  SDValue ShiftRightLo =
3907  DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
3908  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3909  SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3910  SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
3911 
3912  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3913 
3914  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3915  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3916 
3917  SDValue Parts[2] = {Lo, Hi};
3918  return DAG.getMergeValues(Parts, DL);
3919 }
3920 
3921 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
3922  bool IsSRA) const {
3923  SDLoc DL(Op);
3924  SDValue Lo = Op.getOperand(0);
3925  SDValue Hi = Op.getOperand(1);
3926  SDValue Shamt = Op.getOperand(2);
3927  EVT VT = Lo.getValueType();
3928 
3929  // SRA expansion:
3930  // if Shamt-XLEN < 0: // Shamt < XLEN
3931  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
3932  // Hi = Hi >>s Shamt
3933  // else:
3934  // Lo = Hi >>s (Shamt-XLEN);
3935  // Hi = Hi >>s (XLEN-1)
3936  //
3937  // SRL expansion:
3938  // if Shamt-XLEN < 0: // Shamt < XLEN
3939  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
3940  // Hi = Hi >>u Shamt
3941  // else:
3942  // Lo = Hi >>u (Shamt-XLEN);
3943  // Hi = 0;
3944 
3945  unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3946 
3947  SDValue Zero = DAG.getConstant(0, DL, VT);
3948  SDValue One = DAG.getConstant(1, DL, VT);
3949  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3950  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3951  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3952  SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
3953 
3954  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3955  SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3956  SDValue ShiftLeftHi =
3957  DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
3958  SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3959  SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3960  SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
3961  SDValue HiFalse =
3962  IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
3963 
3964  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3965 
3966  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3967  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3968 
3969  SDValue Parts[2] = {Lo, Hi};
3970  return DAG.getMergeValues(Parts, DL);
3971 }
3972 
3973 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
3974 // legal equivalently-sized i8 type, so we can use that as a go-between.
3975 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
3976  SelectionDAG &DAG) const {
3977  SDLoc DL(Op);
3978  MVT VT = Op.getSimpleValueType();
3979  SDValue SplatVal = Op.getOperand(0);
3980  // All-zeros or all-ones splats are handled specially.
3981  if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
3982  SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3983  return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
3984  }
3985  if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
3986  SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3987  return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
3988  }
3989  MVT XLenVT = Subtarget.getXLenVT();
3990  assert(SplatVal.getValueType() == XLenVT &&
3991  "Unexpected type for i1 splat value");
3992  MVT InterVT = VT.changeVectorElementType(MVT::i8);
3993  SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
3994  DAG.getConstant(1, DL, XLenVT));
3995  SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
3996  SDValue Zero = DAG.getConstant(0, DL, InterVT);
3997  return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
3998 }
3999 
4000 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
4001 // illegal (currently only vXi64 RV32).
4002 // FIXME: We could also catch non-constant sign-extended i32 values and lower
4003 // them to VMV_V_X_VL.
4004 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
4005  SelectionDAG &DAG) const {
4006  SDLoc DL(Op);
4007  MVT VecVT = Op.getSimpleValueType();
4008  assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
4009  "Unexpected SPLAT_VECTOR_PARTS lowering");
4010 
4011  assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
4012  SDValue Lo = Op.getOperand(0);
4013  SDValue Hi = Op.getOperand(1);
4014 
4015  if (VecVT.isFixedLengthVector()) {
4016  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4017  SDLoc DL(Op);
4018  SDValue Mask, VL;
4019  std::tie(Mask, VL) =
4020  getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4021 
4022  SDValue Res =
4023  splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
4024  return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
4025  }
4026 
4027  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4028  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4029  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4030  // If Hi constant is all the same sign bit as Lo, lower this as a custom
4031  // node in order to try and match RVV vector/scalar instructions.
4032  if ((LoC >> 31) == HiC)
4033  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
4034  Lo, DAG.getRegister(RISCV::X0, MVT::i32));
4035  }
4036 
4037  // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4038  if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4039  isa<ConstantSDNode>(Hi.getOperand(1)) &&
4040  Hi.getConstantOperandVal(1) == 31)
4041  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
4042  DAG.getRegister(RISCV::X0, MVT::i32));
4043 
4044  // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
4045  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT,
4046  DAG.getUNDEF(VecVT), Lo, Hi,
4047  DAG.getRegister(RISCV::X0, MVT::i32));
4048 }
4049 
4050 // Custom-lower extensions from mask vectors by using a vselect either with 1
4051 // for zero/any-extension or -1 for sign-extension:
4052 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
4053 // Note that any-extension is lowered identically to zero-extension.
4054 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
4055  int64_t ExtTrueVal) const {
4056  SDLoc DL(Op);
4057  MVT VecVT = Op.getSimpleValueType();
4058  SDValue Src = Op.getOperand(0);
4059  // Only custom-lower extensions from mask types
4060  assert(Src.getValueType().isVector() &&
4061  Src.getValueType().getVectorElementType() == MVT::i1);
4062 
4063  if (VecVT.isScalableVector()) {
4064  SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
4065  SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
4066  return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
4067  }
4068 
4069  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4070  MVT I1ContainerVT =
4072 
4073  SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
4074 
4075  SDValue Mask, VL;
4076  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4077 
4078  MVT XLenVT = Subtarget.getXLenVT();
4079  SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
4080  SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
4081 
4082  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4083  DAG.getUNDEF(ContainerVT), SplatZero, VL);
4084  SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4085  DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
4086  SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
4087  SplatTrueVal, SplatZero, VL);
4088 
4089  return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
4090 }
4091 
4092 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
4093  SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
4094  MVT ExtVT = Op.getSimpleValueType();
4095  // Only custom-lower extensions from fixed-length vector types.
4096  if (!ExtVT.isFixedLengthVector())
4097  return Op;
4098  MVT VT = Op.getOperand(0).getSimpleValueType();
4099  // Grab the canonical container type for the extended type. Infer the smaller
4100  // type from that to ensure the same number of vector elements, as we know
4101  // the LMUL will be sufficient to hold the smaller type.
4102  MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
4103  // Get the extended container type manually to ensure the same number of
4104  // vector elements between source and dest.
4105  MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
4106  ContainerExtVT.getVectorElementCount());
4107 
4108  SDValue Op1 =
4109  convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
4110 
4111  SDLoc DL(Op);
4112  SDValue Mask, VL;
4113  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4114 
4115  SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
4116 
4117  return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
4118 }
4119 
4120 // Custom-lower truncations from vectors to mask vectors by using a mask and a
4121 // setcc operation:
4122 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
4123 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
4124  SelectionDAG &DAG) const {
4125  bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
4126  SDLoc DL(Op);
4127  EVT MaskVT = Op.getValueType();
4128  // Only expect to custom-lower truncations to mask types
4129  assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
4130  "Unexpected type for vector mask lowering");
4131  SDValue Src = Op.getOperand(0);
4132  MVT VecVT = Src.getSimpleValueType();
4133  SDValue Mask, VL;
4134  if (IsVPTrunc) {
4135  Mask = Op.getOperand(1);
4136  VL = Op.getOperand(2);
4137  }
4138  // If this is a fixed vector, we need to convert it to a scalable vector.
4139  MVT ContainerVT = VecVT;
4140 
4141  if (VecVT.isFixedLengthVector()) {
4142  ContainerVT = getContainerForFixedLengthVector(VecVT);
4143  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4144  if (IsVPTrunc) {
4145  MVT MaskContainerVT =
4146  getContainerForFixedLengthVector(Mask.getSimpleValueType());
4147  Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
4148  }
4149  }
4150 
4151  if (!IsVPTrunc) {
4152  std::tie(Mask, VL) =
4153  getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4154  }
4155 
4156  SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
4157  SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
4158 
4159  SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4160  DAG.getUNDEF(ContainerVT), SplatOne, VL);
4161  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4162  DAG.getUNDEF(ContainerVT), SplatZero, VL);
4163 
4164  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4165  SDValue Trunc =
4166  DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
4167  Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
4168  DAG.getCondCode(ISD::SETNE), Mask, VL);
4169  if (MaskVT.isFixedLengthVector())
4170  Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
4171  return Trunc;
4172 }
4173 
4174 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
4175  SelectionDAG &DAG) const {
4176  bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
4177  SDLoc DL(Op);
4178 
4179  MVT VT = Op.getSimpleValueType();
4180  // Only custom-lower vector truncates
4181  assert(VT.isVector() && "Unexpected type for vector truncate lowering");
4182 
4183  // Truncates to mask types are handled differently
4184  if (VT.getVectorElementType() == MVT::i1)
4185  return lowerVectorMaskTruncLike(Op, DAG);
4186 
4187  // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
4188  // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
4189  // truncate by one power of two at a time.
4190  MVT DstEltVT = VT.getVectorElementType();
4191 
4192  SDValue Src = Op.getOperand(0);
4193  MVT SrcVT = Src.getSimpleValueType();
4194  MVT SrcEltVT = SrcVT.getVectorElementType();
4195 
4196  assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
4197  isPowerOf2_64(SrcEltVT.getSizeInBits()) &&