LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267 if (RV64LegalI32 && Subtarget.is64Bit())
269
271
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
279
280 if (!RV64LegalI32) {
283 MVT::i32, Custom);
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
288 } else {
290 if (Subtarget.hasStdExtZbb()) {
293 }
294 }
296 } else {
298 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 nullptr);
300 setLibcallName(RTLIB::MULO_I64, nullptr);
301 }
302
303 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
305 if (RV64LegalI32 && Subtarget.is64Bit())
307 } else if (Subtarget.is64Bit()) {
309 if (!RV64LegalI32)
311 else
313 } else {
315 }
316
317 if (!Subtarget.hasStdExtM()) {
319 XLenVT, Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
333 Expand);
334 }
335
338 Expand);
339
341 Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
352 } else {
354 if (RV64LegalI32 && Subtarget.is64Bit())
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
383 Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
391 else
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
396 if (RV64LegalI32 && Subtarget.is64Bit())
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
410 } else
412 }
413 } else {
415 if (RV64LegalI32 && Subtarget.is64Bit())
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
429 if (RV64LegalI32 && Subtarget.is64Bit())
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
440
441 static const ISD::CondCode FPCCToExpand[] = {
445
446 static const unsigned FPOpToExpand[] = {
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
467
468 if (Subtarget.hasStdExtZfbfmin()) {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
499 }
500
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
506
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
526
528 Subtarget.hasStdExtZfa() ? Legal : Custom);
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
550
551 if (Subtarget.hasStdExtZfa()) {
554 } else {
556 }
557 }
558
559 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
561
562 if (Subtarget.hasStdExtDOrZdinx()) {
563 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
564
565 if (!Subtarget.is64Bit())
567
568 if (Subtarget.hasStdExtZfa()) {
569 setOperationAction(FPRndMode, MVT::f64, Legal);
572 } else {
573 if (Subtarget.is64Bit())
574 setOperationAction(FPRndMode, MVT::f64, Custom);
575
577 }
578
581 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
587 setOperationAction(FPOpToExpand, MVT::f64, Expand);
588 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
589 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
590 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
591 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
595 Subtarget.isSoftFPABI() ? LibCall : Custom);
598 }
599
600 if (Subtarget.is64Bit()) {
603 MVT::i32, Custom);
605 }
606
607 if (Subtarget.hasStdExtFOrZfinx()) {
609 Custom);
610
613 XLenVT, Legal);
614
615 if (RV64LegalI32 && Subtarget.is64Bit())
618 MVT::i32, Legal);
619
622 }
623
626 XLenVT, Custom);
627
629
630 if (Subtarget.is64Bit())
632
633 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
634 // Unfortunately this can't be determined just from the ISA naming string.
636 Subtarget.is64Bit() ? Legal : Custom);
638 Subtarget.is64Bit() ? Legal : Custom);
639
642 if (Subtarget.is64Bit())
644
645 if (Subtarget.hasStdExtZicbop()) {
647 }
648
649 if (Subtarget.hasStdExtA()) {
651 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
653 else
655 } else if (Subtarget.hasForcedAtomics()) {
657 } else {
659 }
660
662
664
665 if (Subtarget.hasVInstructions()) {
667
669 if (RV64LegalI32 && Subtarget.is64Bit())
671
672 // RVV intrinsics may have illegal operands.
673 // We also need to custom legalize vmv.x.s.
676 {MVT::i8, MVT::i16}, Custom);
677 if (Subtarget.is64Bit())
679 MVT::i32, Custom);
680 else
682 MVT::i64, Custom);
683
685 MVT::Other, Custom);
686
687 static const unsigned IntegerVPOps[] = {
688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
691 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
701 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
702
703 static const unsigned FloatingPointVPOps[] = {
704 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
705 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
706 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
707 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
708 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
709 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
710 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
711 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
712 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
713 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
714 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
715 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
716 ISD::EXPERIMENTAL_VP_SPLICE};
717
718 static const unsigned IntegerVecReduceOps[] = {
722
723 static const unsigned FloatingPointVecReduceOps[] = {
726
727 if (!Subtarget.is64Bit()) {
728 // We must custom-lower certain vXi64 operations on RV32 due to the vector
729 // element type being illegal.
731 MVT::i64, Custom);
732
733 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
734
735 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
736 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
737 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
738 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
739 MVT::i64, Custom);
740 }
741
742 for (MVT VT : BoolVecVTs) {
743 if (!isTypeLegal(VT))
744 continue;
745
747
748 // Mask VTs are custom-expanded into a series of standard nodes
752 VT, Custom);
753
755 Custom);
756
759 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
760 Expand);
761
762 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
763 Custom);
764
765 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
766
769 Custom);
770
772 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
773 Custom);
774
775 // RVV has native int->float & float->int conversions where the
776 // element type sizes are within one power-of-two of each other. Any
777 // wider distances between type sizes have to be lowered as sequences
778 // which progressively narrow the gap in stages.
783 VT, Custom);
785 Custom);
786
787 // Expand all extending loads to types larger than this, and truncating
788 // stores from types larger than this.
790 setTruncStoreAction(VT, OtherVT, Expand);
792 OtherVT, Expand);
793 }
794
795 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
796 ISD::VP_TRUNCATE, ISD::VP_SETCC},
797 VT, Custom);
798
801
803
804 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
805 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
806
809 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
810 }
811
812 for (MVT VT : IntVecVTs) {
813 if (!isTypeLegal(VT))
814 continue;
815
818
819 // Vectors implement MULHS/MULHU.
821
822 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
823 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
825
827 Legal);
828
830
831 // Custom-lower extensions and truncations from/to mask types.
833 VT, Custom);
834
835 // RVV has native int->float & float->int conversions where the
836 // element type sizes are within one power-of-two of each other. Any
837 // wider distances between type sizes have to be lowered as sequences
838 // which progressively narrow the gap in stages.
843 VT, Custom);
845 Custom);
848 VT, Legal);
849
850 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
851 // nodes which truncate by one power of two at a time.
853
854 // Custom-lower insert/extract operations to simplify patterns.
856 Custom);
857
858 // Custom-lower reduction operations to set up the corresponding custom
859 // nodes' operands.
860 setOperationAction(IntegerVecReduceOps, VT, Custom);
861
862 setOperationAction(IntegerVPOps, VT, Custom);
863
865
867 VT, Custom);
868
870 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
871 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
872 VT, Custom);
873
876 VT, Custom);
877
880
882
884 setTruncStoreAction(VT, OtherVT, Expand);
886 OtherVT, Expand);
887 }
888
891
892 // Splice
894
895 if (Subtarget.hasStdExtZvkb()) {
897 setOperationAction(ISD::VP_BSWAP, VT, Custom);
898 } else {
899 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
901 }
902
903 if (Subtarget.hasStdExtZvbb()) {
905 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
906 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
907 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
908 VT, Custom);
909 } else {
910 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
912 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
913 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
914 VT, Expand);
915
916 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
917 // range of f32.
918 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
919 if (isTypeLegal(FloatVT)) {
921 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
922 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
923 VT, Custom);
924 }
925 }
926 }
927
928 // Expand various CCs to best match the RVV ISA, which natively supports UNE
929 // but no other unordered comparisons, and supports all ordered comparisons
930 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
931 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
932 // and we pattern-match those back to the "original", swapping operands once
933 // more. This way we catch both operations and both "vf" and "fv" forms with
934 // fewer patterns.
935 static const ISD::CondCode VFPCCToExpand[] = {
939 };
940
941 // TODO: support more ops.
942 static const unsigned ZvfhminPromoteOps[] = {
950
951 // TODO: support more vp ops.
952 static const unsigned ZvfhminPromoteVPOps[] = {
953 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
954 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
955 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
956 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
957 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
958 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
959 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
960 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
961 ISD::VP_FMAXIMUM};
962
963 // Sets common operation actions on RVV floating-point vector types.
964 const auto SetCommonVFPActions = [&](MVT VT) {
966 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
967 // sizes are within one power-of-two of each other. Therefore conversions
968 // between vXf16 and vXf64 must be lowered as sequences which convert via
969 // vXf32.
972 // Custom-lower insert/extract operations to simplify patterns.
974 Custom);
975 // Expand various condition codes (explained above).
976 setCondCodeAction(VFPCCToExpand, VT, Expand);
977
980
984 VT, Custom);
985
986 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
987
988 // Expand FP operations that need libcalls.
1000
1002
1004
1006 VT, Custom);
1007
1009 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1010 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1011 VT, Custom);
1012
1015
1018 VT, Custom);
1019
1022
1024
1025 setOperationAction(FloatingPointVPOps, VT, Custom);
1026
1028 Custom);
1031 VT, Legal);
1036 VT, Custom);
1037 };
1038
1039 // Sets common extload/truncstore actions on RVV floating-point vector
1040 // types.
1041 const auto SetCommonVFPExtLoadTruncStoreActions =
1042 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1043 for (auto SmallVT : SmallerVTs) {
1044 setTruncStoreAction(VT, SmallVT, Expand);
1045 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1046 }
1047 };
1048
1049 if (Subtarget.hasVInstructionsF16()) {
1050 for (MVT VT : F16VecVTs) {
1051 if (!isTypeLegal(VT))
1052 continue;
1053 SetCommonVFPActions(VT);
1054 }
1055 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1056 for (MVT VT : F16VecVTs) {
1057 if (!isTypeLegal(VT))
1058 continue;
1061 Custom);
1062 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1063 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1064 Custom);
1067 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1068 VT, Custom);
1071 VT, Custom);
1072 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1074 // load/store
1076
1077 // Custom split nxv32f16 since nxv32f32 if not legal.
1078 if (VT == MVT::nxv32f16) {
1079 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1080 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1081 continue;
1082 }
1083 // Add more promote ops.
1084 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1085 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1086 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1087 }
1088 }
1089
1090 if (Subtarget.hasVInstructionsF32()) {
1091 for (MVT VT : F32VecVTs) {
1092 if (!isTypeLegal(VT))
1093 continue;
1094 SetCommonVFPActions(VT);
1095 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1096 }
1097 }
1098
1099 if (Subtarget.hasVInstructionsF64()) {
1100 for (MVT VT : F64VecVTs) {
1101 if (!isTypeLegal(VT))
1102 continue;
1103 SetCommonVFPActions(VT);
1104 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1105 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1106 }
1107 }
1108
1109 if (Subtarget.useRVVForFixedLengthVectors()) {
1111 if (!useRVVForFixedLengthVectorVT(VT))
1112 continue;
1113
1114 // By default everything must be expanded.
1115 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1118 setTruncStoreAction(VT, OtherVT, Expand);
1120 OtherVT, Expand);
1121 }
1122
1123 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1124 // expansion to a build_vector of 0s.
1126
1127 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1129 Custom);
1130
1132 Custom);
1133
1135 VT, Custom);
1136
1138
1140
1142
1144
1146
1148
1151 Custom);
1152
1154 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1155 Custom);
1156
1158 {
1167 },
1168 VT, Custom);
1170 Custom);
1171
1173
1174 // Operations below are different for between masks and other vectors.
1175 if (VT.getVectorElementType() == MVT::i1) {
1176 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1177 ISD::OR, ISD::XOR},
1178 VT, Custom);
1179
1180 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1181 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1182 VT, Custom);
1183
1184 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1185 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1186 continue;
1187 }
1188
1189 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1190 // it before type legalization for i64 vectors on RV32. It will then be
1191 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1192 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1193 // improvements first.
1194 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1197 }
1198
1201
1202 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1203 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1204 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1205 ISD::VP_SCATTER},
1206 VT, Custom);
1207
1211 VT, Custom);
1212
1215
1217
1218 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1219 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1221
1224 VT, Custom);
1225
1228
1231
1232 // Custom-lower reduction operations to set up the corresponding custom
1233 // nodes' operands.
1237 VT, Custom);
1238
1239 setOperationAction(IntegerVPOps, VT, Custom);
1240
1241 if (Subtarget.hasStdExtZvkb())
1243
1244 if (Subtarget.hasStdExtZvbb()) {
1247 VT, Custom);
1248 } else {
1249 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1250 // range of f32.
1251 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1252 if (isTypeLegal(FloatVT))
1255 Custom);
1256 }
1257 }
1258
1260 // There are no extending loads or truncating stores.
1261 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1262 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1263 setTruncStoreAction(VT, InnerVT, Expand);
1264 }
1265
1266 if (!useRVVForFixedLengthVectorVT(VT))
1267 continue;
1268
1269 // By default everything must be expanded.
1270 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1272
1273 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1274 // expansion to a build_vector of 0s.
1276
1277 if (VT.getVectorElementType() == MVT::f16 &&
1278 !Subtarget.hasVInstructionsF16()) {
1281 Custom);
1282 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1284 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1285 Custom);
1287 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1288 VT, Custom);
1291 VT, Custom);
1294 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1295 // Don't promote f16 vector operations to f32 if f32 vector type is
1296 // not legal.
1297 // TODO: could split the f16 vector into two vectors and do promotion.
1298 if (!isTypeLegal(F32VecVT))
1299 continue;
1300 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1301 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1302 continue;
1303 }
1304
1305 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1307 Custom);
1308
1312 VT, Custom);
1313
1316 VT, Custom);
1317
1318 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1319 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1320 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1321 ISD::VP_SCATTER},
1322 VT, Custom);
1323
1328 VT, Custom);
1329
1331
1334 VT, Custom);
1335
1336 setCondCodeAction(VFPCCToExpand, VT, Expand);
1337
1341
1343
1344 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1345
1346 setOperationAction(FloatingPointVPOps, VT, Custom);
1347
1349 Custom);
1356 VT, Custom);
1357 }
1358
1359 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1360 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1361 Custom);
1362 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1364 if (Subtarget.hasStdExtFOrZfinx())
1366 if (Subtarget.hasStdExtDOrZdinx())
1368 }
1369 }
1370
1371 if (Subtarget.hasStdExtA()) {
1373 if (RV64LegalI32 && Subtarget.is64Bit())
1375 }
1376
1377 if (Subtarget.hasForcedAtomics()) {
1378 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1384 XLenVT, LibCall);
1385 }
1386
1387 if (Subtarget.hasVendorXTHeadMemIdx()) {
1388 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1389 setIndexedLoadAction(im, MVT::i8, Legal);
1390 setIndexedStoreAction(im, MVT::i8, Legal);
1391 setIndexedLoadAction(im, MVT::i16, Legal);
1392 setIndexedStoreAction(im, MVT::i16, Legal);
1393 setIndexedLoadAction(im, MVT::i32, Legal);
1394 setIndexedStoreAction(im, MVT::i32, Legal);
1395
1396 if (Subtarget.is64Bit()) {
1397 setIndexedLoadAction(im, MVT::i64, Legal);
1398 setIndexedStoreAction(im, MVT::i64, Legal);
1399 }
1400 }
1401 }
1402
1403 // Function alignments.
1404 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1405 setMinFunctionAlignment(FunctionAlignment);
1406 // Set preferred alignments.
1409
1413 if (Subtarget.is64Bit())
1415
1416 if (Subtarget.hasStdExtFOrZfinx())
1418
1419 if (Subtarget.hasStdExtZbb())
1421
1422 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1424
1425 if (Subtarget.hasStdExtZbkb())
1427 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1429 if (Subtarget.hasStdExtFOrZfinx())
1432 if (Subtarget.hasVInstructions())
1434 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1437 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1440 if (Subtarget.hasVendorXTHeadMemPair())
1442 if (Subtarget.useRVVForFixedLengthVectors())
1444
1445 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1446 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1447
1448 // Disable strict node mutation.
1449 IsStrictFPEnabled = true;
1450}
1451
1453 LLVMContext &Context,
1454 EVT VT) const {
1455 if (!VT.isVector())
1456 return getPointerTy(DL);
1457 if (Subtarget.hasVInstructions() &&
1458 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1459 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1461}
1462
1463MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1464 return Subtarget.getXLenVT();
1465}
1466
1467// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1468bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1469 unsigned VF,
1470 bool IsScalable) const {
1471 if (!Subtarget.hasVInstructions())
1472 return true;
1473
1474 if (!IsScalable)
1475 return true;
1476
1477 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1478 return true;
1479
1480 // Don't allow VF=1 if those types are't legal.
1481 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1482 return true;
1483
1484 // VLEN=32 support is incomplete.
1485 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1486 return true;
1487
1488 // The maximum VF is for the smallest element width with LMUL=8.
1489 // VF must be a power of 2.
1490 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1491 return VF > MaxVF || !isPowerOf2_32(VF);
1492}
1493
1494bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
1495 return !Subtarget.hasVInstructions() ||
1496 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1497}
1498
1500 const CallInst &I,
1501 MachineFunction &MF,
1502 unsigned Intrinsic) const {
1503 auto &DL = I.getModule()->getDataLayout();
1504
1505 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1506 bool IsUnitStrided, bool UsePtrVal = false) {
1508 // We can't use ptrVal if the intrinsic can access memory before the
1509 // pointer. This means we can't use it for strided or indexed intrinsics.
1510 if (UsePtrVal)
1511 Info.ptrVal = I.getArgOperand(PtrOp);
1512 else
1513 Info.fallbackAddressSpace =
1514 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1515 Type *MemTy;
1516 if (IsStore) {
1517 // Store value is the first operand.
1518 MemTy = I.getArgOperand(0)->getType();
1519 } else {
1520 // Use return type. If it's segment load, return type is a struct.
1521 MemTy = I.getType();
1522 if (MemTy->isStructTy())
1523 MemTy = MemTy->getStructElementType(0);
1524 }
1525 if (!IsUnitStrided)
1526 MemTy = MemTy->getScalarType();
1527
1528 Info.memVT = getValueType(DL, MemTy);
1529 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1531 Info.flags |=
1533 return true;
1534 };
1535
1536 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1538
1540 switch (Intrinsic) {
1541 default:
1542 return false;
1543 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1544 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1545 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1546 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1547 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1548 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1549 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1550 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1551 case Intrinsic::riscv_masked_cmpxchg_i32:
1553 Info.memVT = MVT::i32;
1554 Info.ptrVal = I.getArgOperand(0);
1555 Info.offset = 0;
1556 Info.align = Align(4);
1559 return true;
1560 case Intrinsic::riscv_masked_strided_load:
1561 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1562 /*IsUnitStrided*/ false);
1563 case Intrinsic::riscv_masked_strided_store:
1564 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1565 /*IsUnitStrided*/ false);
1566 case Intrinsic::riscv_seg2_load:
1567 case Intrinsic::riscv_seg3_load:
1568 case Intrinsic::riscv_seg4_load:
1569 case Intrinsic::riscv_seg5_load:
1570 case Intrinsic::riscv_seg6_load:
1571 case Intrinsic::riscv_seg7_load:
1572 case Intrinsic::riscv_seg8_load:
1573 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1574 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1575 case Intrinsic::riscv_seg2_store:
1576 case Intrinsic::riscv_seg3_store:
1577 case Intrinsic::riscv_seg4_store:
1578 case Intrinsic::riscv_seg5_store:
1579 case Intrinsic::riscv_seg6_store:
1580 case Intrinsic::riscv_seg7_store:
1581 case Intrinsic::riscv_seg8_store:
1582 // Operands are (vec, ..., vec, ptr, vl)
1583 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1584 /*IsStore*/ true,
1585 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1586 case Intrinsic::riscv_vle:
1587 case Intrinsic::riscv_vle_mask:
1588 case Intrinsic::riscv_vleff:
1589 case Intrinsic::riscv_vleff_mask:
1590 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1591 /*IsStore*/ false,
1592 /*IsUnitStrided*/ true,
1593 /*UsePtrVal*/ true);
1594 case Intrinsic::riscv_vse:
1595 case Intrinsic::riscv_vse_mask:
1596 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1597 /*IsStore*/ true,
1598 /*IsUnitStrided*/ true,
1599 /*UsePtrVal*/ true);
1600 case Intrinsic::riscv_vlse:
1601 case Intrinsic::riscv_vlse_mask:
1602 case Intrinsic::riscv_vloxei:
1603 case Intrinsic::riscv_vloxei_mask:
1604 case Intrinsic::riscv_vluxei:
1605 case Intrinsic::riscv_vluxei_mask:
1606 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1607 /*IsStore*/ false,
1608 /*IsUnitStrided*/ false);
1609 case Intrinsic::riscv_vsse:
1610 case Intrinsic::riscv_vsse_mask:
1611 case Intrinsic::riscv_vsoxei:
1612 case Intrinsic::riscv_vsoxei_mask:
1613 case Intrinsic::riscv_vsuxei:
1614 case Intrinsic::riscv_vsuxei_mask:
1615 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1616 /*IsStore*/ true,
1617 /*IsUnitStrided*/ false);
1618 case Intrinsic::riscv_vlseg2:
1619 case Intrinsic::riscv_vlseg3:
1620 case Intrinsic::riscv_vlseg4:
1621 case Intrinsic::riscv_vlseg5:
1622 case Intrinsic::riscv_vlseg6:
1623 case Intrinsic::riscv_vlseg7:
1624 case Intrinsic::riscv_vlseg8:
1625 case Intrinsic::riscv_vlseg2ff:
1626 case Intrinsic::riscv_vlseg3ff:
1627 case Intrinsic::riscv_vlseg4ff:
1628 case Intrinsic::riscv_vlseg5ff:
1629 case Intrinsic::riscv_vlseg6ff:
1630 case Intrinsic::riscv_vlseg7ff:
1631 case Intrinsic::riscv_vlseg8ff:
1632 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1633 /*IsStore*/ false,
1634 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1635 case Intrinsic::riscv_vlseg2_mask:
1636 case Intrinsic::riscv_vlseg3_mask:
1637 case Intrinsic::riscv_vlseg4_mask:
1638 case Intrinsic::riscv_vlseg5_mask:
1639 case Intrinsic::riscv_vlseg6_mask:
1640 case Intrinsic::riscv_vlseg7_mask:
1641 case Intrinsic::riscv_vlseg8_mask:
1642 case Intrinsic::riscv_vlseg2ff_mask:
1643 case Intrinsic::riscv_vlseg3ff_mask:
1644 case Intrinsic::riscv_vlseg4ff_mask:
1645 case Intrinsic::riscv_vlseg5ff_mask:
1646 case Intrinsic::riscv_vlseg6ff_mask:
1647 case Intrinsic::riscv_vlseg7ff_mask:
1648 case Intrinsic::riscv_vlseg8ff_mask:
1649 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1650 /*IsStore*/ false,
1651 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1652 case Intrinsic::riscv_vlsseg2:
1653 case Intrinsic::riscv_vlsseg3:
1654 case Intrinsic::riscv_vlsseg4:
1655 case Intrinsic::riscv_vlsseg5:
1656 case Intrinsic::riscv_vlsseg6:
1657 case Intrinsic::riscv_vlsseg7:
1658 case Intrinsic::riscv_vlsseg8:
1659 case Intrinsic::riscv_vloxseg2:
1660 case Intrinsic::riscv_vloxseg3:
1661 case Intrinsic::riscv_vloxseg4:
1662 case Intrinsic::riscv_vloxseg5:
1663 case Intrinsic::riscv_vloxseg6:
1664 case Intrinsic::riscv_vloxseg7:
1665 case Intrinsic::riscv_vloxseg8:
1666 case Intrinsic::riscv_vluxseg2:
1667 case Intrinsic::riscv_vluxseg3:
1668 case Intrinsic::riscv_vluxseg4:
1669 case Intrinsic::riscv_vluxseg5:
1670 case Intrinsic::riscv_vluxseg6:
1671 case Intrinsic::riscv_vluxseg7:
1672 case Intrinsic::riscv_vluxseg8:
1673 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1674 /*IsStore*/ false,
1675 /*IsUnitStrided*/ false);
1676 case Intrinsic::riscv_vlsseg2_mask:
1677 case Intrinsic::riscv_vlsseg3_mask:
1678 case Intrinsic::riscv_vlsseg4_mask:
1679 case Intrinsic::riscv_vlsseg5_mask:
1680 case Intrinsic::riscv_vlsseg6_mask:
1681 case Intrinsic::riscv_vlsseg7_mask:
1682 case Intrinsic::riscv_vlsseg8_mask:
1683 case Intrinsic::riscv_vloxseg2_mask:
1684 case Intrinsic::riscv_vloxseg3_mask:
1685 case Intrinsic::riscv_vloxseg4_mask:
1686 case Intrinsic::riscv_vloxseg5_mask:
1687 case Intrinsic::riscv_vloxseg6_mask:
1688 case Intrinsic::riscv_vloxseg7_mask:
1689 case Intrinsic::riscv_vloxseg8_mask:
1690 case Intrinsic::riscv_vluxseg2_mask:
1691 case Intrinsic::riscv_vluxseg3_mask:
1692 case Intrinsic::riscv_vluxseg4_mask:
1693 case Intrinsic::riscv_vluxseg5_mask:
1694 case Intrinsic::riscv_vluxseg6_mask:
1695 case Intrinsic::riscv_vluxseg7_mask:
1696 case Intrinsic::riscv_vluxseg8_mask:
1697 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1698 /*IsStore*/ false,
1699 /*IsUnitStrided*/ false);
1700 case Intrinsic::riscv_vsseg2:
1701 case Intrinsic::riscv_vsseg3:
1702 case Intrinsic::riscv_vsseg4:
1703 case Intrinsic::riscv_vsseg5:
1704 case Intrinsic::riscv_vsseg6:
1705 case Intrinsic::riscv_vsseg7:
1706 case Intrinsic::riscv_vsseg8:
1707 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1708 /*IsStore*/ true,
1709 /*IsUnitStrided*/ false);
1710 case Intrinsic::riscv_vsseg2_mask:
1711 case Intrinsic::riscv_vsseg3_mask:
1712 case Intrinsic::riscv_vsseg4_mask:
1713 case Intrinsic::riscv_vsseg5_mask:
1714 case Intrinsic::riscv_vsseg6_mask:
1715 case Intrinsic::riscv_vsseg7_mask:
1716 case Intrinsic::riscv_vsseg8_mask:
1717 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1718 /*IsStore*/ true,
1719 /*IsUnitStrided*/ false);
1720 case Intrinsic::riscv_vssseg2:
1721 case Intrinsic::riscv_vssseg3:
1722 case Intrinsic::riscv_vssseg4:
1723 case Intrinsic::riscv_vssseg5:
1724 case Intrinsic::riscv_vssseg6:
1725 case Intrinsic::riscv_vssseg7:
1726 case Intrinsic::riscv_vssseg8:
1727 case Intrinsic::riscv_vsoxseg2:
1728 case Intrinsic::riscv_vsoxseg3:
1729 case Intrinsic::riscv_vsoxseg4:
1730 case Intrinsic::riscv_vsoxseg5:
1731 case Intrinsic::riscv_vsoxseg6:
1732 case Intrinsic::riscv_vsoxseg7:
1733 case Intrinsic::riscv_vsoxseg8:
1734 case Intrinsic::riscv_vsuxseg2:
1735 case Intrinsic::riscv_vsuxseg3:
1736 case Intrinsic::riscv_vsuxseg4:
1737 case Intrinsic::riscv_vsuxseg5:
1738 case Intrinsic::riscv_vsuxseg6:
1739 case Intrinsic::riscv_vsuxseg7:
1740 case Intrinsic::riscv_vsuxseg8:
1741 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1742 /*IsStore*/ true,
1743 /*IsUnitStrided*/ false);
1744 case Intrinsic::riscv_vssseg2_mask:
1745 case Intrinsic::riscv_vssseg3_mask:
1746 case Intrinsic::riscv_vssseg4_mask:
1747 case Intrinsic::riscv_vssseg5_mask:
1748 case Intrinsic::riscv_vssseg6_mask:
1749 case Intrinsic::riscv_vssseg7_mask:
1750 case Intrinsic::riscv_vssseg8_mask:
1751 case Intrinsic::riscv_vsoxseg2_mask:
1752 case Intrinsic::riscv_vsoxseg3_mask:
1753 case Intrinsic::riscv_vsoxseg4_mask:
1754 case Intrinsic::riscv_vsoxseg5_mask:
1755 case Intrinsic::riscv_vsoxseg6_mask:
1756 case Intrinsic::riscv_vsoxseg7_mask:
1757 case Intrinsic::riscv_vsoxseg8_mask:
1758 case Intrinsic::riscv_vsuxseg2_mask:
1759 case Intrinsic::riscv_vsuxseg3_mask:
1760 case Intrinsic::riscv_vsuxseg4_mask:
1761 case Intrinsic::riscv_vsuxseg5_mask:
1762 case Intrinsic::riscv_vsuxseg6_mask:
1763 case Intrinsic::riscv_vsuxseg7_mask:
1764 case Intrinsic::riscv_vsuxseg8_mask:
1765 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1766 /*IsStore*/ true,
1767 /*IsUnitStrided*/ false);
1768 }
1769}
1770
1772 const AddrMode &AM, Type *Ty,
1773 unsigned AS,
1774 Instruction *I) const {
1775 // No global is ever allowed as a base.
1776 if (AM.BaseGV)
1777 return false;
1778
1779 // RVV instructions only support register addressing.
1780 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1781 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1782
1783 // Require a 12-bit signed offset.
1784 if (!isInt<12>(AM.BaseOffs))
1785 return false;
1786
1787 switch (AM.Scale) {
1788 case 0: // "r+i" or just "i", depending on HasBaseReg.
1789 break;
1790 case 1:
1791 if (!AM.HasBaseReg) // allow "r+i".
1792 break;
1793 return false; // disallow "r+r" or "r+r+i".
1794 default:
1795 return false;
1796 }
1797
1798 return true;
1799}
1800
1802 return isInt<12>(Imm);
1803}
1804
1806 return isInt<12>(Imm);
1807}
1808
1809// On RV32, 64-bit integers are split into their high and low parts and held
1810// in two different registers, so the trunc is free since the low register can
1811// just be used.
1812// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1813// isTruncateFree?
1815 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1816 return false;
1817 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1818 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1819 return (SrcBits == 64 && DestBits == 32);
1820}
1821
1823 // We consider i64->i32 free on RV64 since we have good selection of W
1824 // instructions that make promoting operations back to i64 free in many cases.
1825 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1826 !DstVT.isInteger())
1827 return false;
1828 unsigned SrcBits = SrcVT.getSizeInBits();
1829 unsigned DestBits = DstVT.getSizeInBits();
1830 return (SrcBits == 64 && DestBits == 32);
1831}
1832
1834 // Zexts are free if they can be combined with a load.
1835 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1836 // poorly with type legalization of compares preferring sext.
1837 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1838 EVT MemVT = LD->getMemoryVT();
1839 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1840 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1841 LD->getExtensionType() == ISD::ZEXTLOAD))
1842 return true;
1843 }
1844
1845 return TargetLowering::isZExtFree(Val, VT2);
1846}
1847
1849 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1850}
1851
1853 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1854}
1855
1857 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1858}
1859
1861 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1862 Subtarget.hasVendorXCVbitmanip();
1863}
1864
1866 const Instruction &AndI) const {
1867 // We expect to be able to match a bit extraction instruction if the Zbs
1868 // extension is supported and the mask is a power of two. However, we
1869 // conservatively return false if the mask would fit in an ANDI instruction,
1870 // on the basis that it's possible the sinking+duplication of the AND in
1871 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1872 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1873 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1874 return false;
1875 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1876 if (!Mask)
1877 return false;
1878 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1879}
1880
1882 EVT VT = Y.getValueType();
1883
1884 // FIXME: Support vectors once we have tests.
1885 if (VT.isVector())
1886 return false;
1887
1888 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1889 !isa<ConstantSDNode>(Y);
1890}
1891
1893 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1894 if (Subtarget.hasStdExtZbs())
1895 return X.getValueType().isScalarInteger();
1896 auto *C = dyn_cast<ConstantSDNode>(Y);
1897 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1898 if (Subtarget.hasVendorXTHeadBs())
1899 return C != nullptr;
1900 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1901 return C && C->getAPIntValue().ule(10);
1902}
1903
1905 EVT VT) const {
1906 // Only enable for rvv.
1907 if (!VT.isVector() || !Subtarget.hasVInstructions())
1908 return false;
1909
1910 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1911 return false;
1912
1913 return true;
1914}
1915
1917 Type *Ty) const {
1918 assert(Ty->isIntegerTy());
1919
1920 unsigned BitSize = Ty->getIntegerBitWidth();
1921 if (BitSize > Subtarget.getXLen())
1922 return false;
1923
1924 // Fast path, assume 32-bit immediates are cheap.
1925 int64_t Val = Imm.getSExtValue();
1926 if (isInt<32>(Val))
1927 return true;
1928
1929 // A constant pool entry may be more aligned thant he load we're trying to
1930 // replace. If we don't support unaligned scalar mem, prefer the constant
1931 // pool.
1932 // TODO: Can the caller pass down the alignment?
1933 if (!Subtarget.enableUnalignedScalarMem())
1934 return true;
1935
1936 // Prefer to keep the load if it would require many instructions.
1937 // This uses the same threshold we use for constant pools but doesn't
1938 // check useConstantPoolForLargeInts.
1939 // TODO: Should we keep the load only when we're definitely going to emit a
1940 // constant pool?
1941
1943 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1944}
1945
1949 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1950 SelectionDAG &DAG) const {
1951 // One interesting pattern that we'd want to form is 'bit extract':
1952 // ((1 >> Y) & 1) ==/!= 0
1953 // But we also need to be careful not to try to reverse that fold.
1954
1955 // Is this '((1 >> Y) & 1)'?
1956 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1957 return false; // Keep the 'bit extract' pattern.
1958
1959 // Will this be '((1 >> Y) & 1)' after the transform?
1960 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1961 return true; // Do form the 'bit extract' pattern.
1962
1963 // If 'X' is a constant, and we transform, then we will immediately
1964 // try to undo the fold, thus causing endless combine loop.
1965 // So only do the transform if X is not a constant. This matches the default
1966 // implementation of this function.
1967 return !XC;
1968}
1969
1970bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1971 switch (Opcode) {
1972 case Instruction::Add:
1973 case Instruction::Sub:
1974 case Instruction::Mul:
1975 case Instruction::And:
1976 case Instruction::Or:
1977 case Instruction::Xor:
1978 case Instruction::FAdd:
1979 case Instruction::FSub:
1980 case Instruction::FMul:
1981 case Instruction::FDiv:
1982 case Instruction::ICmp:
1983 case Instruction::FCmp:
1984 return true;
1985 case Instruction::Shl:
1986 case Instruction::LShr:
1987 case Instruction::AShr:
1988 case Instruction::UDiv:
1989 case Instruction::SDiv:
1990 case Instruction::URem:
1991 case Instruction::SRem:
1992 return Operand == 1;
1993 default:
1994 return false;
1995 }
1996}
1997
1998
2000 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2001 return false;
2002
2003 if (canSplatOperand(I->getOpcode(), Operand))
2004 return true;
2005
2006 auto *II = dyn_cast<IntrinsicInst>(I);
2007 if (!II)
2008 return false;
2009
2010 switch (II->getIntrinsicID()) {
2011 case Intrinsic::fma:
2012 case Intrinsic::vp_fma:
2013 return Operand == 0 || Operand == 1;
2014 case Intrinsic::vp_shl:
2015 case Intrinsic::vp_lshr:
2016 case Intrinsic::vp_ashr:
2017 case Intrinsic::vp_udiv:
2018 case Intrinsic::vp_sdiv:
2019 case Intrinsic::vp_urem:
2020 case Intrinsic::vp_srem:
2021 case Intrinsic::ssub_sat:
2022 case Intrinsic::vp_ssub_sat:
2023 case Intrinsic::usub_sat:
2024 case Intrinsic::vp_usub_sat:
2025 return Operand == 1;
2026 // These intrinsics are commutative.
2027 case Intrinsic::vp_add:
2028 case Intrinsic::vp_mul:
2029 case Intrinsic::vp_and:
2030 case Intrinsic::vp_or:
2031 case Intrinsic::vp_xor:
2032 case Intrinsic::vp_fadd:
2033 case Intrinsic::vp_fmul:
2034 case Intrinsic::vp_icmp:
2035 case Intrinsic::vp_fcmp:
2036 case Intrinsic::smin:
2037 case Intrinsic::vp_smin:
2038 case Intrinsic::umin:
2039 case Intrinsic::vp_umin:
2040 case Intrinsic::smax:
2041 case Intrinsic::vp_smax:
2042 case Intrinsic::umax:
2043 case Intrinsic::vp_umax:
2044 case Intrinsic::sadd_sat:
2045 case Intrinsic::vp_sadd_sat:
2046 case Intrinsic::uadd_sat:
2047 case Intrinsic::vp_uadd_sat:
2048 // These intrinsics have 'vr' versions.
2049 case Intrinsic::vp_sub:
2050 case Intrinsic::vp_fsub:
2051 case Intrinsic::vp_fdiv:
2052 return Operand == 0 || Operand == 1;
2053 default:
2054 return false;
2055 }
2056}
2057
2058/// Check if sinking \p I's operands to I's basic block is profitable, because
2059/// the operands can be folded into a target instruction, e.g.
2060/// splats of scalars can fold into vector instructions.
2062 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2063 using namespace llvm::PatternMatch;
2064
2065 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2066 return false;
2067
2068 // Don't sink splat operands if the target prefers it. Some targets requires
2069 // S2V transfer buffers and we can run out of them copying the same value
2070 // repeatedly.
2071 // FIXME: It could still be worth doing if it would improve vector register
2072 // pressure and prevent a vector spill.
2073 if (!Subtarget.sinkSplatOperands())
2074 return false;
2075
2076 for (auto OpIdx : enumerate(I->operands())) {
2077 if (!canSplatOperand(I, OpIdx.index()))
2078 continue;
2079
2080 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2081 // Make sure we are not already sinking this operand
2082 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2083 continue;
2084
2085 // We are looking for a splat that can be sunk.
2087 m_Undef(), m_ZeroMask())))
2088 continue;
2089
2090 // Don't sink i1 splats.
2091 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2092 continue;
2093
2094 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2095 // and vector registers
2096 for (Use &U : Op->uses()) {
2097 Instruction *Insn = cast<Instruction>(U.getUser());
2098 if (!canSplatOperand(Insn, U.getOperandNo()))
2099 return false;
2100 }
2101
2102 Ops.push_back(&Op->getOperandUse(0));
2103 Ops.push_back(&OpIdx.value());
2104 }
2105 return true;
2106}
2107
2109 unsigned Opc = VecOp.getOpcode();
2110
2111 // Assume target opcodes can't be scalarized.
2112 // TODO - do we have any exceptions?
2113 if (Opc >= ISD::BUILTIN_OP_END)
2114 return false;
2115
2116 // If the vector op is not supported, try to convert to scalar.
2117 EVT VecVT = VecOp.getValueType();
2118 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2119 return true;
2120
2121 // If the vector op is supported, but the scalar op is not, the transform may
2122 // not be worthwhile.
2123 // Permit a vector binary operation can be converted to scalar binary
2124 // operation which is custom lowered with illegal type.
2125 EVT ScalarVT = VecVT.getScalarType();
2126 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2127 isOperationCustom(Opc, ScalarVT);
2128}
2129
2131 const GlobalAddressSDNode *GA) const {
2132 // In order to maximise the opportunity for common subexpression elimination,
2133 // keep a separate ADD node for the global address offset instead of folding
2134 // it in the global address node. Later peephole optimisations may choose to
2135 // fold it back in when profitable.
2136 return false;
2137}
2138
2139// Return one of the followings:
2140// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2141// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2142// positive counterpart, which will be materialized from the first returned
2143// element. The second returned element indicated that there should be a FNEG
2144// followed.
2145// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2146std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2147 EVT VT) const {
2148 if (!Subtarget.hasStdExtZfa())
2149 return std::make_pair(-1, false);
2150
2151 bool IsSupportedVT = false;
2152 if (VT == MVT::f16) {
2153 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2154 } else if (VT == MVT::f32) {
2155 IsSupportedVT = true;
2156 } else if (VT == MVT::f64) {
2157 assert(Subtarget.hasStdExtD() && "Expect D extension");
2158 IsSupportedVT = true;
2159 }
2160
2161 if (!IsSupportedVT)
2162 return std::make_pair(-1, false);
2163
2165 if (Index < 0 && Imm.isNegative())
2166 // Try the combination of its positive counterpart + FNEG.
2167 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2168 else
2169 return std::make_pair(Index, false);
2170}
2171
2173 bool ForCodeSize) const {
2174 bool IsLegalVT = false;
2175 if (VT == MVT::f16)
2176 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2177 else if (VT == MVT::f32)
2178 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2179 else if (VT == MVT::f64)
2180 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2181 else if (VT == MVT::bf16)
2182 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2183
2184 if (!IsLegalVT)
2185 return false;
2186
2187 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2188 return true;
2189
2190 // Cannot create a 64 bit floating-point immediate value for rv32.
2191 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2192 // td can handle +0.0 or -0.0 already.
2193 // -0.0 can be created by fmv + fneg.
2194 return Imm.isZero();
2195 }
2196
2197 // Special case: fmv + fneg
2198 if (Imm.isNegZero())
2199 return true;
2200
2201 // Building an integer and then converting requires a fmv at the end of
2202 // the integer sequence.
2203 const int Cost =
2204 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2205 Subtarget);
2206 return Cost <= FPImmCost;
2207}
2208
2209// TODO: This is very conservative.
2211 unsigned Index) const {
2213 return false;
2214
2215 // Only support extracting a fixed from a fixed vector for now.
2216 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2217 return false;
2218
2219 EVT EltVT = ResVT.getVectorElementType();
2220 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2221
2222 // The smallest type we can slide is i8.
2223 // TODO: We can extract index 0 from a mask vector without a slide.
2224 if (EltVT == MVT::i1)
2225 return false;
2226
2227 unsigned ResElts = ResVT.getVectorNumElements();
2228 unsigned SrcElts = SrcVT.getVectorNumElements();
2229
2230 unsigned MinVLen = Subtarget.getRealMinVLen();
2231 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2232
2233 // If we're extracting only data from the first VLEN bits of the source
2234 // then we can always do this with an m1 vslidedown.vx. Restricting the
2235 // Index ensures we can use a vslidedown.vi.
2236 // TODO: We can generalize this when the exact VLEN is known.
2237 if (Index + ResElts <= MinVLMAX && Index < 31)
2238 return true;
2239
2240 // Convervatively only handle extracting half of a vector.
2241 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2242 // a cheap extract. However, this case is important in practice for
2243 // shuffled extracts of longer vectors. How resolve?
2244 if ((ResElts * 2) != SrcElts)
2245 return false;
2246
2247 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2248 // cheap.
2249 if (Index >= 32)
2250 return false;
2251
2252 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2253 // the upper half of a vector until we have more test coverage.
2254 return Index == 0 || Index == ResElts;
2255}
2256
2259 EVT VT) const {
2260 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2261 // We might still end up using a GPR but that will be decided based on ABI.
2262 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2263 !Subtarget.hasStdExtZfhminOrZhinxmin())
2264 return MVT::f32;
2265
2267
2268 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2269 return MVT::i64;
2270
2271 return PartVT;
2272}
2273
2276 EVT VT) const {
2277 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2278 // We might still end up using a GPR but that will be decided based on ABI.
2279 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2280 !Subtarget.hasStdExtZfhminOrZhinxmin())
2281 return 1;
2282
2284}
2285
2287 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2288 unsigned &NumIntermediates, MVT &RegisterVT) const {
2290 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2291
2292 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2293 IntermediateVT = MVT::i64;
2294
2295 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2296 RegisterVT = MVT::i64;
2297
2298 return NumRegs;
2299}
2300
2301// Changes the condition code and swaps operands if necessary, so the SetCC
2302// operation matches one of the comparisons supported directly by branches
2303// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2304// with 1/-1.
2305static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2306 ISD::CondCode &CC, SelectionDAG &DAG) {
2307 // If this is a single bit test that can't be handled by ANDI, shift the
2308 // bit to be tested to the MSB and perform a signed compare with 0.
2309 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2310 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2311 isa<ConstantSDNode>(LHS.getOperand(1))) {
2312 uint64_t Mask = LHS.getConstantOperandVal(1);
2313 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2314 unsigned ShAmt = 0;
2315 if (isPowerOf2_64(Mask)) {
2317 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2318 } else {
2319 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2320 }
2321
2322 LHS = LHS.getOperand(0);
2323 if (ShAmt != 0)
2324 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2325 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2326 return;
2327 }
2328 }
2329
2330 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2331 int64_t C = RHSC->getSExtValue();
2332 switch (CC) {
2333 default: break;
2334 case ISD::SETGT:
2335 // Convert X > -1 to X >= 0.
2336 if (C == -1) {
2337 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2338 CC = ISD::SETGE;
2339 return;
2340 }
2341 break;
2342 case ISD::SETLT:
2343 // Convert X < 1 to 0 >= X.
2344 if (C == 1) {
2345 RHS = LHS;
2346 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2347 CC = ISD::SETGE;
2348 return;
2349 }
2350 break;
2351 }
2352 }
2353
2354 switch (CC) {
2355 default:
2356 break;
2357 case ISD::SETGT:
2358 case ISD::SETLE:
2359 case ISD::SETUGT:
2360 case ISD::SETULE:
2362 std::swap(LHS, RHS);
2363 break;
2364 }
2365}
2366
2368 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2369 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2370 if (VT.getVectorElementType() == MVT::i1)
2371 KnownSize *= 8;
2372
2373 switch (KnownSize) {
2374 default:
2375 llvm_unreachable("Invalid LMUL.");
2376 case 8:
2378 case 16:
2380 case 32:
2382 case 64:
2384 case 128:
2386 case 256:
2388 case 512:
2390 }
2391}
2392
2394 switch (LMul) {
2395 default:
2396 llvm_unreachable("Invalid LMUL.");
2401 return RISCV::VRRegClassID;
2403 return RISCV::VRM2RegClassID;
2405 return RISCV::VRM4RegClassID;
2407 return RISCV::VRM8RegClassID;
2408 }
2409}
2410
2412 RISCVII::VLMUL LMUL = getLMUL(VT);
2413 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2414 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2415 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2416 LMUL == RISCVII::VLMUL::LMUL_1) {
2417 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2418 "Unexpected subreg numbering");
2419 return RISCV::sub_vrm1_0 + Index;
2420 }
2421 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2422 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2423 "Unexpected subreg numbering");
2424 return RISCV::sub_vrm2_0 + Index;
2425 }
2426 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2427 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2428 "Unexpected subreg numbering");
2429 return RISCV::sub_vrm4_0 + Index;
2430 }
2431 llvm_unreachable("Invalid vector type.");
2432}
2433
2435 if (VT.getVectorElementType() == MVT::i1)
2436 return RISCV::VRRegClassID;
2437 return getRegClassIDForLMUL(getLMUL(VT));
2438}
2439
2440// Attempt to decompose a subvector insert/extract between VecVT and
2441// SubVecVT via subregister indices. Returns the subregister index that
2442// can perform the subvector insert/extract with the given element index, as
2443// well as the index corresponding to any leftover subvectors that must be
2444// further inserted/extracted within the register class for SubVecVT.
2445std::pair<unsigned, unsigned>
2447 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2448 const RISCVRegisterInfo *TRI) {
2449 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2450 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2451 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2452 "Register classes not ordered");
2453 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2454 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2455 // Try to compose a subregister index that takes us from the incoming
2456 // LMUL>1 register class down to the outgoing one. At each step we half
2457 // the LMUL:
2458 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2459 // Note that this is not guaranteed to find a subregister index, such as
2460 // when we are extracting from one VR type to another.
2461 unsigned SubRegIdx = RISCV::NoSubRegister;
2462 for (const unsigned RCID :
2463 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2464 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2465 VecVT = VecVT.getHalfNumVectorElementsVT();
2466 bool IsHi =
2467 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2468 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2469 getSubregIndexByMVT(VecVT, IsHi));
2470 if (IsHi)
2471 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2472 }
2473 return {SubRegIdx, InsertExtractIdx};
2474}
2475
2476// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2477// stores for those types.
2478bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2479 return !Subtarget.useRVVForFixedLengthVectors() ||
2480 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2481}
2482
2484 if (!ScalarTy.isSimple())
2485 return false;
2486 switch (ScalarTy.getSimpleVT().SimpleTy) {
2487 case MVT::iPTR:
2488 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2489 case MVT::i8:
2490 case MVT::i16:
2491 case MVT::i32:
2492 return true;
2493 case MVT::i64:
2494 return Subtarget.hasVInstructionsI64();
2495 case MVT::f16:
2496 return Subtarget.hasVInstructionsF16();
2497 case MVT::f32:
2498 return Subtarget.hasVInstructionsF32();
2499 case MVT::f64:
2500 return Subtarget.hasVInstructionsF64();
2501 default:
2502 return false;
2503 }
2504}
2505
2506
2507unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2508 return NumRepeatedDivisors;
2509}
2510
2512 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2513 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2514 "Unexpected opcode");
2515 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2516 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2518 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2519 if (!II)
2520 return SDValue();
2521 return Op.getOperand(II->VLOperand + 1 + HasChain);
2522}
2523
2525 const RISCVSubtarget &Subtarget) {
2526 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2527 if (!Subtarget.useRVVForFixedLengthVectors())
2528 return false;
2529
2530 // We only support a set of vector types with a consistent maximum fixed size
2531 // across all supported vector element types to avoid legalization issues.
2532 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2533 // fixed-length vector type we support is 1024 bytes.
2534 if (VT.getFixedSizeInBits() > 1024 * 8)
2535 return false;
2536
2537 unsigned MinVLen = Subtarget.getRealMinVLen();
2538
2539 MVT EltVT = VT.getVectorElementType();
2540
2541 // Don't use RVV for vectors we cannot scalarize if required.
2542 switch (EltVT.SimpleTy) {
2543 // i1 is supported but has different rules.
2544 default:
2545 return false;
2546 case MVT::i1:
2547 // Masks can only use a single register.
2548 if (VT.getVectorNumElements() > MinVLen)
2549 return false;
2550 MinVLen /= 8;
2551 break;
2552 case MVT::i8:
2553 case MVT::i16:
2554 case MVT::i32:
2555 break;
2556 case MVT::i64:
2557 if (!Subtarget.hasVInstructionsI64())
2558 return false;
2559 break;
2560 case MVT::f16:
2561 if (!Subtarget.hasVInstructionsF16Minimal())
2562 return false;
2563 break;
2564 case MVT::f32:
2565 if (!Subtarget.hasVInstructionsF32())
2566 return false;
2567 break;
2568 case MVT::f64:
2569 if (!Subtarget.hasVInstructionsF64())
2570 return false;
2571 break;
2572 }
2573
2574 // Reject elements larger than ELEN.
2575 if (EltVT.getSizeInBits() > Subtarget.getELen())
2576 return false;
2577
2578 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2579 // Don't use RVV for types that don't fit.
2580 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2581 return false;
2582
2583 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2584 // the base fixed length RVV support in place.
2585 if (!VT.isPow2VectorType())
2586 return false;
2587
2588 return true;
2589}
2590
2591bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2592 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2593}
2594
2595// Return the largest legal scalable vector type that matches VT's element type.
2597 const RISCVSubtarget &Subtarget) {
2598 // This may be called before legal types are setup.
2599 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2600 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2601 "Expected legal fixed length vector!");
2602
2603 unsigned MinVLen = Subtarget.getRealMinVLen();
2604 unsigned MaxELen = Subtarget.getELen();
2605
2606 MVT EltVT = VT.getVectorElementType();
2607 switch (EltVT.SimpleTy) {
2608 default:
2609 llvm_unreachable("unexpected element type for RVV container");
2610 case MVT::i1:
2611 case MVT::i8:
2612 case MVT::i16:
2613 case MVT::i32:
2614 case MVT::i64:
2615 case MVT::f16:
2616 case MVT::f32:
2617 case MVT::f64: {
2618 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2619 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2620 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2621 unsigned NumElts =
2623 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2624 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2625 return MVT::getScalableVectorVT(EltVT, NumElts);
2626 }
2627 }
2628}
2629
2631 const RISCVSubtarget &Subtarget) {
2633 Subtarget);
2634}
2635
2637 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2638}
2639
2640// Grow V to consume an entire RVV register.
2642 const RISCVSubtarget &Subtarget) {
2643 assert(VT.isScalableVector() &&
2644 "Expected to convert into a scalable vector!");
2645 assert(V.getValueType().isFixedLengthVector() &&
2646 "Expected a fixed length vector operand!");
2647 SDLoc DL(V);
2648 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2649 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2650}
2651
2652// Shrink V so it's just big enough to maintain a VT's worth of data.
2654 const RISCVSubtarget &Subtarget) {
2656 "Expected to convert into a fixed length vector!");
2657 assert(V.getValueType().isScalableVector() &&
2658 "Expected a scalable vector operand!");
2659 SDLoc DL(V);
2660 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2661 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2662}
2663
2664/// Return the type of the mask type suitable for masking the provided
2665/// vector type. This is simply an i1 element type vector of the same
2666/// (possibly scalable) length.
2667static MVT getMaskTypeFor(MVT VecVT) {
2668 assert(VecVT.isVector());
2670 return MVT::getVectorVT(MVT::i1, EC);
2671}
2672
2673/// Creates an all ones mask suitable for masking a vector of type VecTy with
2674/// vector length VL. .
2675static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2676 SelectionDAG &DAG) {
2677 MVT MaskVT = getMaskTypeFor(VecVT);
2678 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2679}
2680
2681static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2682 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2683 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2684 // canonicalize the representation. InsertVSETVLI will pick the immediate
2685 // encoding later if profitable.
2686 const auto [MinVLMAX, MaxVLMAX] =
2687 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2688 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2689 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2690
2691 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2692}
2693
2694static std::pair<SDValue, SDValue>
2696 const RISCVSubtarget &Subtarget) {
2697 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2698 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2699 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2700 return {Mask, VL};
2701}
2702
2703static std::pair<SDValue, SDValue>
2704getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2705 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2706 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2707 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2708 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2709 return {Mask, VL};
2710}
2711
2712// Gets the two common "VL" operands: an all-ones mask and the vector length.
2713// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2714// the vector type that the fixed-length vector is contained in. Otherwise if
2715// VecVT is scalable, then ContainerVT should be the same as VecVT.
2716static std::pair<SDValue, SDValue>
2717getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2718 const RISCVSubtarget &Subtarget) {
2719 if (VecVT.isFixedLengthVector())
2720 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2721 Subtarget);
2722 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2723 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2724}
2725
2727 SelectionDAG &DAG) const {
2728 assert(VecVT.isScalableVector() && "Expected scalable vector");
2729 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2730 VecVT.getVectorElementCount());
2731}
2732
2733std::pair<unsigned, unsigned>
2735 const RISCVSubtarget &Subtarget) {
2736 assert(VecVT.isScalableVector() && "Expected scalable vector");
2737
2738 unsigned EltSize = VecVT.getScalarSizeInBits();
2739 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2740
2741 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2742 unsigned MaxVLMAX =
2743 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2744
2745 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2746 unsigned MinVLMAX =
2747 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2748
2749 return std::make_pair(MinVLMAX, MaxVLMAX);
2750}
2751
2752// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2753// of either is (currently) supported. This can get us into an infinite loop
2754// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2755// as a ..., etc.
2756// Until either (or both) of these can reliably lower any node, reporting that
2757// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2758// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2759// which is not desirable.
2761 EVT VT, unsigned DefinedValues) const {
2762 return false;
2763}
2764
2766 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2767 // implementation-defined.
2768 if (!VT.isVector())
2770 unsigned DLenFactor = Subtarget.getDLenFactor();
2771 unsigned Cost;
2772 if (VT.isScalableVector()) {
2773 unsigned LMul;
2774 bool Fractional;
2775 std::tie(LMul, Fractional) =
2777 if (Fractional)
2778 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2779 else
2780 Cost = (LMul * DLenFactor);
2781 } else {
2782 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2783 }
2784 return Cost;
2785}
2786
2787
2788/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2789/// is generally quadratic in the number of vreg implied by LMUL. Note that
2790/// operand (index and possibly mask) are handled separately.
2792 return getLMULCost(VT) * getLMULCost(VT);
2793}
2794
2795/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2796/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2797/// or may track the vrgather.vv cost. It is implementation-dependent.
2799 return getLMULCost(VT);
2800}
2801
2802/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2803/// for the type VT. (This does not cover the vslide1up or vslide1down
2804/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2805/// or may track the vrgather.vv cost. It is implementation-dependent.
2807 return getLMULCost(VT);
2808}
2809
2810/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2811/// for the type VT. (This does not cover the vslide1up or vslide1down
2812/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2813/// or may track the vrgather.vv cost. It is implementation-dependent.
2815 return getLMULCost(VT);
2816}
2817
2819 const RISCVSubtarget &Subtarget) {
2820 // RISC-V FP-to-int conversions saturate to the destination register size, but
2821 // don't produce 0 for nan. We can use a conversion instruction and fix the
2822 // nan case with a compare and a select.
2823 SDValue Src = Op.getOperand(0);
2824
2825 MVT DstVT = Op.getSimpleValueType();
2826 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2827
2828 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2829
2830 if (!DstVT.isVector()) {
2831 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2832 // the result.
2833 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2834 Src.getValueType() == MVT::bf16) {
2835 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2836 }
2837
2838 unsigned Opc;
2839 if (SatVT == DstVT)
2840 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2841 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2843 else
2844 return SDValue();
2845 // FIXME: Support other SatVTs by clamping before or after the conversion.
2846
2847 SDLoc DL(Op);
2848 SDValue FpToInt = DAG.getNode(
2849 Opc, DL, DstVT, Src,
2851
2852 if (Opc == RISCVISD::FCVT_WU_RV64)
2853 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2854
2855 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2856 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2858 }
2859
2860 // Vectors.
2861
2862 MVT DstEltVT = DstVT.getVectorElementType();
2863 MVT SrcVT = Src.getSimpleValueType();
2864 MVT SrcEltVT = SrcVT.getVectorElementType();
2865 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2866 unsigned DstEltSize = DstEltVT.getSizeInBits();
2867
2868 // Only handle saturating to the destination type.
2869 if (SatVT != DstEltVT)
2870 return SDValue();
2871
2872 // FIXME: Don't support narrowing by more than 1 steps for now.
2873 if (SrcEltSize > (2 * DstEltSize))
2874 return SDValue();
2875
2876 MVT DstContainerVT = DstVT;
2877 MVT SrcContainerVT = SrcVT;
2878 if (DstVT.isFixedLengthVector()) {
2879 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2880 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2881 assert(DstContainerVT.getVectorElementCount() ==
2882 SrcContainerVT.getVectorElementCount() &&
2883 "Expected same element count");
2884 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2885 }
2886
2887 SDLoc DL(Op);
2888
2889 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2890
2891 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2892 {Src, Src, DAG.getCondCode(ISD::SETNE),
2893 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2894
2895 // Need to widen by more than 1 step, promote the FP type, then do a widening
2896 // convert.
2897 if (DstEltSize > (2 * SrcEltSize)) {
2898 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2899 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2900 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2901 }
2902
2903 unsigned RVVOpc =
2905 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2906
2907 SDValue SplatZero = DAG.getNode(
2908 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2909 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2910 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2911 Res, DAG.getUNDEF(DstContainerVT), VL);
2912
2913 if (DstVT.isFixedLengthVector())
2914 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2915
2916 return Res;
2917}
2918
2920 switch (Opc) {
2921 case ISD::FROUNDEVEN:
2923 case ISD::VP_FROUNDEVEN:
2924 return RISCVFPRndMode::RNE;
2925 case ISD::FTRUNC:
2926 case ISD::STRICT_FTRUNC:
2927 case ISD::VP_FROUNDTOZERO:
2928 return RISCVFPRndMode::RTZ;
2929 case ISD::FFLOOR:
2930 case ISD::STRICT_FFLOOR:
2931 case ISD::VP_FFLOOR:
2932 return RISCVFPRndMode::RDN;
2933 case ISD::FCEIL:
2934 case ISD::STRICT_FCEIL:
2935 case ISD::VP_FCEIL:
2936 return RISCVFPRndMode::RUP;
2937 case ISD::FROUND:
2938 case ISD::STRICT_FROUND:
2939 case ISD::VP_FROUND:
2940 return RISCVFPRndMode::RMM;
2941 case ISD::FRINT:
2942 return RISCVFPRndMode::DYN;
2943 }
2944
2946}
2947
2948// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2949// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2950// the integer domain and back. Taking care to avoid converting values that are
2951// nan or already correct.
2952static SDValue
2954 const RISCVSubtarget &Subtarget) {
2955 MVT VT = Op.getSimpleValueType();
2956 assert(VT.isVector() && "Unexpected type");
2957
2958 SDLoc DL(Op);
2959
2960 SDValue Src = Op.getOperand(0);
2961
2962 MVT ContainerVT = VT;
2963 if (VT.isFixedLengthVector()) {
2964 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2965 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2966 }
2967
2968 SDValue Mask, VL;
2969 if (Op->isVPOpcode()) {
2970 Mask = Op.getOperand(1);
2971 if (VT.isFixedLengthVector())
2972 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2973 Subtarget);
2974 VL = Op.getOperand(2);
2975 } else {
2976 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2977 }
2978
2979 // Freeze the source since we are increasing the number of uses.
2980 Src = DAG.getFreeze(Src);
2981
2982 // We do the conversion on the absolute value and fix the sign at the end.
2983 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2984
2985 // Determine the largest integer that can be represented exactly. This and
2986 // values larger than it don't have any fractional bits so don't need to
2987 // be converted.
2988 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2989 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2990 APFloat MaxVal = APFloat(FltSem);
2991 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2992 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2993 SDValue MaxValNode =
2994 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2995 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2996 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2997
2998 // If abs(Src) was larger than MaxVal or nan, keep it.
2999 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3000 Mask =
3001 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3002 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3003 Mask, Mask, VL});
3004
3005 // Truncate to integer and convert back to FP.
3006 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3007 MVT XLenVT = Subtarget.getXLenVT();
3008 SDValue Truncated;
3009
3010 switch (Op.getOpcode()) {
3011 default:
3012 llvm_unreachable("Unexpected opcode");
3013 case ISD::FCEIL:
3014 case ISD::VP_FCEIL:
3015 case ISD::FFLOOR:
3016 case ISD::VP_FFLOOR:
3017 case ISD::FROUND:
3018 case ISD::FROUNDEVEN:
3019 case ISD::VP_FROUND:
3020 case ISD::VP_FROUNDEVEN:
3021 case ISD::VP_FROUNDTOZERO: {
3024 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3025 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3026 break;
3027 }
3028 case ISD::FTRUNC:
3029 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3030 Mask, VL);
3031 break;
3032 case ISD::FRINT:
3033 case ISD::VP_FRINT:
3034 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3035 break;
3036 case ISD::FNEARBYINT:
3037 case ISD::VP_FNEARBYINT:
3038 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3039 Mask, VL);
3040 break;
3041 }
3042
3043 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3044 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3045 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3046 Mask, VL);
3047
3048 // Restore the original sign so that -0.0 is preserved.
3049 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3050 Src, Src, Mask, VL);
3051
3052 if (!VT.isFixedLengthVector())
3053 return Truncated;
3054
3055 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3056}
3057
3058// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3059// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3060// qNan and coverting the new source to integer and back to FP.
3061static SDValue
3063 const RISCVSubtarget &Subtarget) {
3064 SDLoc DL(Op);
3065 MVT VT = Op.getSimpleValueType();
3066 SDValue Chain = Op.getOperand(0);
3067 SDValue Src = Op.getOperand(1);
3068
3069 MVT ContainerVT = VT;
3070 if (VT.isFixedLengthVector()) {
3071 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3072 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3073 }
3074
3075 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3076
3077 // Freeze the source since we are increasing the number of uses.
3078 Src = DAG.getFreeze(Src);
3079
3080 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3081 MVT MaskVT = Mask.getSimpleValueType();
3083 DAG.getVTList(MaskVT, MVT::Other),
3084 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3085 DAG.getUNDEF(MaskVT), Mask, VL});
3086 Chain = Unorder.getValue(1);
3088 DAG.getVTList(ContainerVT, MVT::Other),
3089 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3090 Chain = Src.getValue(1);
3091
3092 // We do the conversion on the absolute value and fix the sign at the end.
3093 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3094
3095 // Determine the largest integer that can be represented exactly. This and
3096 // values larger than it don't have any fractional bits so don't need to
3097 // be converted.
3098 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3099 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3100 APFloat MaxVal = APFloat(FltSem);
3101 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3102 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3103 SDValue MaxValNode =
3104 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3105 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3106 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3107
3108 // If abs(Src) was larger than MaxVal or nan, keep it.
3109 Mask = DAG.getNode(
3110 RISCVISD::SETCC_VL, DL, MaskVT,
3111 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3112
3113 // Truncate to integer and convert back to FP.
3114 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3115 MVT XLenVT = Subtarget.getXLenVT();
3116 SDValue Truncated;
3117
3118 switch (Op.getOpcode()) {
3119 default:
3120 llvm_unreachable("Unexpected opcode");
3121 case ISD::STRICT_FCEIL:
3122 case ISD::STRICT_FFLOOR:
3123 case ISD::STRICT_FROUND:
3127 Truncated = DAG.getNode(
3128 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3129 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3130 break;
3131 }
3132 case ISD::STRICT_FTRUNC:
3133 Truncated =
3135 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3136 break;
3139 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3140 Mask, VL);
3141 break;
3142 }
3143 Chain = Truncated.getValue(1);
3144
3145 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3146 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3147 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3148 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3149 Truncated, Mask, VL);
3150 Chain = Truncated.getValue(1);
3151 }
3152
3153 // Restore the original sign so that -0.0 is preserved.
3154 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3155 Src, Src, Mask, VL);
3156
3157 if (VT.isFixedLengthVector())
3158 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3159 return DAG.getMergeValues({Truncated, Chain}, DL);
3160}
3161
3162static SDValue
3164 const RISCVSubtarget &Subtarget) {
3165 MVT VT = Op.getSimpleValueType();
3166 if (VT.isVector())
3167 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3168
3169 if (DAG.shouldOptForSize())
3170 return SDValue();
3171
3172 SDLoc DL(Op);
3173 SDValue Src = Op.getOperand(0);
3174
3175 // Create an integer the size of the mantissa with the MSB set. This and all
3176 // values larger than it don't have any fractional bits so don't need to be
3177 // converted.
3178 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3179 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3180 APFloat MaxVal = APFloat(FltSem);
3181 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3182 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3183 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3184
3186 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3187 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3188}
3189
3190// Expand vector LRINT and LLRINT by converting to the integer domain.
3192 const RISCVSubtarget &Subtarget) {
3193 MVT VT = Op.getSimpleValueType();
3194 assert(VT.isVector() && "Unexpected type");
3195
3196 SDLoc DL(Op);
3197 SDValue Src = Op.getOperand(0);
3198 MVT ContainerVT = VT;
3199
3200 if (VT.isFixedLengthVector()) {
3201 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3202 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3203 }
3204
3205 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3206 SDValue Truncated =
3207 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3208
3209 if (!VT.isFixedLengthVector())
3210 return Truncated;
3211
3212 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3213}
3214
3215static SDValue
3217 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3218 SDValue Offset, SDValue Mask, SDValue VL,
3220 if (Merge.isUndef())
3222 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3223 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3224 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3225}
3226
3227static SDValue
3228getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3230 SDValue VL,
3232 if (Merge.isUndef())
3234 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3235 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3236 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3237}
3238
3239static MVT getLMUL1VT(MVT VT) {
3241 "Unexpected vector MVT");
3245}
3246
3250 int64_t Addend;
3251};
3252
3253static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3255 // We will use a SINT_TO_FP to materialize this constant so we should use a
3256 // signed APSInt here.
3257 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3258 // We use an arbitrary rounding mode here. If a floating-point is an exact
3259 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3260 // the rounding mode changes the output value, then it is not an exact
3261 // integer.
3263 bool IsExact;
3264 // If it is out of signed integer range, it will return an invalid operation.
3265 // If it is not an exact integer, IsExact is false.
3266 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3268 !IsExact)
3269 return std::nullopt;
3270 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3271}
3272
3273// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3274// to the (non-zero) step S and start value X. This can be then lowered as the
3275// RVV sequence (VID * S) + X, for example.
3276// The step S is represented as an integer numerator divided by a positive
3277// denominator. Note that the implementation currently only identifies
3278// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3279// cannot detect 2/3, for example.
3280// Note that this method will also match potentially unappealing index
3281// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3282// determine whether this is worth generating code for.
3283static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3284 unsigned EltSizeInBits) {
3285 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3286 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3287 return std::nullopt;
3288 bool IsInteger = Op.getValueType().isInteger();
3289
3290 std::optional<unsigned> SeqStepDenom;
3291 std::optional<int64_t> SeqStepNum, SeqAddend;
3292 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3293 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3294
3295 // First extract the ops into a list of constant integer values. This may not
3296 // be possible for floats if they're not all representable as integers.
3298 const unsigned OpSize = Op.getScalarValueSizeInBits();
3299 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3300 if (Elt.isUndef()) {
3301 Elts[Idx] = std::nullopt;
3302 continue;
3303 }
3304 if (IsInteger) {
3305 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3306 } else {
3307 auto ExactInteger =
3308 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3309 if (!ExactInteger)
3310 return std::nullopt;
3311 Elts[Idx] = *ExactInteger;
3312 }
3313 }
3314
3315 for (auto [Idx, Elt] : enumerate(Elts)) {
3316 // Assume undef elements match the sequence; we just have to be careful
3317 // when interpolating across them.
3318 if (!Elt)
3319 continue;
3320
3321 if (PrevElt) {
3322 // Calculate the step since the last non-undef element, and ensure
3323 // it's consistent across the entire sequence.
3324 unsigned IdxDiff = Idx - PrevElt->second;
3325 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3326
3327 // A zero-value value difference means that we're somewhere in the middle
3328 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3329 // step change before evaluating the sequence.
3330 if (ValDiff == 0)
3331 continue;
3332
3333 int64_t Remainder = ValDiff % IdxDiff;
3334 // Normalize the step if it's greater than 1.
3335 if (Remainder != ValDiff) {
3336 // The difference must cleanly divide the element span.
3337 if (Remainder != 0)
3338 return std::nullopt;
3339 ValDiff /= IdxDiff;
3340 IdxDiff = 1;
3341 }
3342
3343 if (!SeqStepNum)
3344 SeqStepNum = ValDiff;
3345 else if (ValDiff != SeqStepNum)
3346 return std::nullopt;
3347
3348 if (!SeqStepDenom)
3349 SeqStepDenom = IdxDiff;
3350 else if (IdxDiff != *SeqStepDenom)
3351 return std::nullopt;
3352 }
3353
3354 // Record this non-undef element for later.
3355 if (!PrevElt || PrevElt->first != *Elt)
3356 PrevElt = std::make_pair(*Elt, Idx);
3357 }
3358
3359 // We need to have logged a step for this to count as a legal index sequence.
3360 if (!SeqStepNum || !SeqStepDenom)
3361 return std::nullopt;
3362
3363 // Loop back through the sequence and validate elements we might have skipped
3364 // while waiting for a valid step. While doing this, log any sequence addend.
3365 for (auto [Idx, Elt] : enumerate(Elts)) {
3366 if (!Elt)
3367 continue;
3368 uint64_t ExpectedVal =
3369 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3370 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3371 if (!SeqAddend)
3372 SeqAddend = Addend;
3373 else if (Addend != SeqAddend)
3374 return std::nullopt;
3375 }
3376
3377 assert(SeqAddend && "Must have an addend if we have a step");
3378
3379 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3380}
3381
3382// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3383// and lower it as a VRGATHER_VX_VL from the source vector.
3384static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3385 SelectionDAG &DAG,
3386 const RISCVSubtarget &Subtarget) {
3387 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3388 return SDValue();
3389 SDValue Vec = SplatVal.getOperand(0);
3390 // Only perform this optimization on vectors of the same size for simplicity.
3391 // Don't perform this optimization for i1 vectors.
3392 // FIXME: Support i1 vectors, maybe by promoting to i8?
3393 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3394 return SDValue();
3395 SDValue Idx = SplatVal.getOperand(1);
3396 // The index must be a legal type.
3397 if (Idx.getValueType() != Subtarget.getXLenVT())
3398 return SDValue();
3399
3400 MVT ContainerVT = VT;
3401 if (VT.isFixedLengthVector()) {
3402 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3403 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3404 }
3405
3406 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3407
3408 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3409 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3410
3411 if (!VT.isFixedLengthVector())
3412 return Gather;
3413
3414 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3415}
3416
3417
3418/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3419/// which constitute a large proportion of the elements. In such cases we can
3420/// splat a vector with the dominant element and make up the shortfall with
3421/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3422/// Note that this includes vectors of 2 elements by association. The
3423/// upper-most element is the "dominant" one, allowing us to use a splat to
3424/// "insert" the upper element, and an insert of the lower element at position
3425/// 0, which improves codegen.
3427 const RISCVSubtarget &Subtarget) {
3428 MVT VT = Op.getSimpleValueType();
3429 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3430
3431 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3432
3433 SDLoc DL(Op);
3434 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3435
3436 MVT XLenVT = Subtarget.getXLenVT();
3437 unsigned NumElts = Op.getNumOperands();
3438
3439 SDValue DominantValue;
3440 unsigned MostCommonCount = 0;
3441 DenseMap<SDValue, unsigned> ValueCounts;
3442 unsigned NumUndefElts =
3443 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3444
3445 // Track the number of scalar loads we know we'd be inserting, estimated as
3446 // any non-zero floating-point constant. Other kinds of element are either
3447 // already in registers or are materialized on demand. The threshold at which
3448 // a vector load is more desirable than several scalar materializion and
3449 // vector-insertion instructions is not known.
3450 unsigned NumScalarLoads = 0;
3451
3452 for (SDValue V : Op->op_values()) {
3453 if (V.isUndef())
3454 continue;
3455
3456 ValueCounts.insert(std::make_pair(V, 0));
3457 unsigned &Count = ValueCounts[V];
3458 if (0 == Count)
3459 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3460 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3461
3462 // Is this value dominant? In case of a tie, prefer the highest element as
3463 // it's cheaper to insert near the beginning of a vector than it is at the
3464 // end.
3465 if (++Count >= MostCommonCount) {
3466 DominantValue = V;
3467 MostCommonCount = Count;
3468 }
3469 }
3470
3471 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3472 unsigned NumDefElts = NumElts - NumUndefElts;
3473 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3474
3475 // Don't perform this optimization when optimizing for size, since
3476 // materializing elements and inserting them tends to cause code bloat.
3477 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3478 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3479 ((MostCommonCount > DominantValueCountThreshold) ||
3480 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3481 // Start by splatting the most common element.
3482 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3483
3484 DenseSet<SDValue> Processed{DominantValue};
3485
3486 // We can handle an insert into the last element (of a splat) via
3487 // v(f)slide1down. This is slightly better than the vslideup insert
3488 // lowering as it avoids the need for a vector group temporary. It
3489 // is also better than using vmerge.vx as it avoids the need to
3490 // materialize the mask in a vector register.
3491 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3492 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3493 LastOp != DominantValue) {
3494 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3495 auto OpCode =
3497 if (!VT.isFloatingPoint())
3498 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3499 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3500 LastOp, Mask, VL);
3501 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3502 Processed.insert(LastOp);
3503 }
3504
3505 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3506 for (const auto &OpIdx : enumerate(Op->ops())) {
3507 const SDValue &V = OpIdx.value();
3508 if (V.isUndef() || !Processed.insert(V).second)
3509 continue;
3510 if (ValueCounts[V] == 1) {
3511 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3512 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3513 } else {
3514 // Blend in all instances of this value using a VSELECT, using a
3515 // mask where each bit signals whether that element is the one
3516 // we're after.
3518 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3519 return DAG.getConstant(V == V1, DL, XLenVT);
3520 });
3521 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3522 DAG.getBuildVector(SelMaskTy, DL, Ops),
3523 DAG.getSplatBuildVector(VT, DL, V), Vec);
3524 }
3525 }
3526
3527 return Vec;
3528 }
3529
3530 return SDValue();
3531}
3532
3534 const RISCVSubtarget &Subtarget) {
3535 MVT VT = Op.getSimpleValueType();
3536 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3537
3538 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3539
3540 SDLoc DL(Op);
3541 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3542
3543 MVT XLenVT = Subtarget.getXLenVT();
3544 unsigned NumElts = Op.getNumOperands();
3545
3546 if (VT.getVectorElementType() == MVT::i1) {
3547 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3548 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3549 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3550 }
3551
3552 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3553 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3554 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3555 }
3556
3557 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3558 // scalar integer chunks whose bit-width depends on the number of mask
3559 // bits and XLEN.
3560 // First, determine the most appropriate scalar integer type to use. This
3561 // is at most XLenVT, but may be shrunk to a smaller vector element type
3562 // according to the size of the final vector - use i8 chunks rather than
3563 // XLenVT if we're producing a v8i1. This results in more consistent
3564 // codegen across RV32 and RV64.
3565 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3566 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3567 // If we have to use more than one INSERT_VECTOR_ELT then this
3568 // optimization is likely to increase code size; avoid peforming it in
3569 // such a case. We can use a load from a constant pool in this case.
3570 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3571 return SDValue();
3572 // Now we can create our integer vector type. Note that it may be larger
3573 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3574 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3575 MVT IntegerViaVecVT =
3576 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3577 IntegerViaVecElts);
3578
3579 uint64_t Bits = 0;
3580 unsigned BitPos = 0, IntegerEltIdx = 0;
3581 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3582
3583 for (unsigned I = 0; I < NumElts;) {
3584 SDValue V = Op.getOperand(I);
3585 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3586 Bits |= ((uint64_t)BitValue << BitPos);
3587 ++BitPos;
3588 ++I;
3589
3590 // Once we accumulate enough bits to fill our scalar type or process the
3591 // last element, insert into our vector and clear our accumulated data.
3592 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3593 if (NumViaIntegerBits <= 32)
3594 Bits = SignExtend64<32>(Bits);
3595 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3596 Elts[IntegerEltIdx] = Elt;
3597 Bits = 0;
3598 BitPos = 0;
3599 IntegerEltIdx++;
3600 }
3601 }
3602
3603 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3604
3605 if (NumElts < NumViaIntegerBits) {
3606 // If we're producing a smaller vector than our minimum legal integer
3607 // type, bitcast to the equivalent (known-legal) mask type, and extract
3608 // our final mask.
3609 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3610 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3611 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3612 DAG.getConstant(0, DL, XLenVT));
3613 } else {
3614 // Else we must have produced an integer type with the same size as the
3615 // mask type; bitcast for the final result.
3616 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3617 Vec = DAG.getBitcast(VT, Vec);
3618 }
3619
3620 return Vec;
3621 }
3622
3623 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3624 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3626 if (!VT.isFloatingPoint())
3627 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3628 Splat =
3629 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3630 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3631 }
3632
3633 // Try and match index sequences, which we can lower to the vid instruction
3634 // with optional modifications. An all-undef vector is matched by
3635 // getSplatValue, above.
3636 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3637 int64_t StepNumerator = SimpleVID->StepNumerator;
3638 unsigned StepDenominator = SimpleVID->StepDenominator;
3639 int64_t Addend = SimpleVID->Addend;
3640
3641 assert(StepNumerator != 0 && "Invalid step");
3642 bool Negate = false;
3643 int64_t SplatStepVal = StepNumerator;
3644 unsigned StepOpcode = ISD::MUL;
3645 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3646 // anyway as the shift of 63 won't fit in uimm5.
3647 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3648 isPowerOf2_64(std::abs(StepNumerator))) {
3649 Negate = StepNumerator < 0;
3650 StepOpcode = ISD::SHL;
3651 SplatStepVal = Log2_64(std::abs(StepNumerator));
3652 }
3653
3654 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3655 // threshold since it's the immediate value many RVV instructions accept.
3656 // There is no vmul.vi instruction so ensure multiply constant can fit in
3657 // a single addi instruction.
3658 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3659 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3660 isPowerOf2_32(StepDenominator) &&
3661 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3662 MVT VIDVT =
3664 MVT VIDContainerVT =
3665 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3666 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3667 // Convert right out of the scalable type so we can use standard ISD
3668 // nodes for the rest of the computation. If we used scalable types with
3669 // these, we'd lose the fixed-length vector info and generate worse
3670 // vsetvli code.
3671 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3672 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3673 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3674 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3675 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3676 }
3677 if (StepDenominator != 1) {
3678 SDValue SplatStep =
3679 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3680 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3681 }
3682 if (Addend != 0 || Negate) {
3683 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3684 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3685 VID);
3686 }
3687 if (VT.isFloatingPoint()) {
3688 // TODO: Use vfwcvt to reduce register pressure.
3689 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3690 }
3691 return VID;
3692 }
3693 }
3694
3695 // For very small build_vectors, use a single scalar insert of a constant.
3696 // TODO: Base this on constant rematerialization cost, not size.
3697 const unsigned EltBitSize = VT.getScalarSizeInBits();
3698 if (VT.getSizeInBits() <= 32 &&
3700 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3701 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3702 "Unexpected sequence type");
3703 // If we can use the original VL with the modified element type, this
3704 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3705 // be moved into InsertVSETVLI?
3706 unsigned ViaVecLen =
3707 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3708 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3709
3710 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3711 uint64_t SplatValue = 0;
3712 // Construct the amalgamated value at this larger vector type.
3713 for (const auto &OpIdx : enumerate(Op->op_values())) {
3714 const auto &SeqV = OpIdx.value();
3715 if (!SeqV.isUndef())
3716 SplatValue |=
3717 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3718 }
3719
3720 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3721 // achieve better constant materializion.
3722 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3723 SplatValue = SignExtend64<32>(SplatValue);
3724
3725 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3726 DAG.getUNDEF(ViaVecVT),
3727 DAG.getConstant(SplatValue, DL, XLenVT),
3728 DAG.getVectorIdxConstant(0, DL));
3729 if (ViaVecLen != 1)
3731 MVT::getVectorVT(ViaIntVT, 1), Vec,
3732 DAG.getConstant(0, DL, XLenVT));
3733 return DAG.getBitcast(VT, Vec);
3734 }
3735
3736
3737 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3738 // when re-interpreted as a vector with a larger element type. For example,
3739 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3740 // could be instead splat as
3741 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3742 // TODO: This optimization could also work on non-constant splats, but it
3743 // would require bit-manipulation instructions to construct the splat value.
3744 SmallVector<SDValue> Sequence;
3745 const auto *BV = cast<BuildVectorSDNode>(Op);
3746 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3748 BV->getRepeatedSequence(Sequence) &&
3749 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3750 unsigned SeqLen = Sequence.size();
3751 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3752 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3753 ViaIntVT == MVT::i64) &&
3754 "Unexpected sequence type");
3755
3756 // If we can use the original VL with the modified element type, this
3757 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3758 // be moved into InsertVSETVLI?
3759 const unsigned RequiredVL = NumElts / SeqLen;
3760 const unsigned ViaVecLen =
3761 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3762 NumElts : RequiredVL;
3763 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3764
3765 unsigned EltIdx = 0;
3766 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3767 uint64_t SplatValue = 0;
3768 // Construct the amalgamated value which can be splatted as this larger
3769 // vector type.
3770 for (const auto &SeqV : Sequence) {
3771 if (!SeqV.isUndef())
3772 SplatValue |=
3773 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3774 EltIdx++;
3775 }
3776
3777 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3778 // achieve better constant materializion.
3779 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3780 SplatValue = SignExtend64<32>(SplatValue);
3781
3782 // Since we can't introduce illegal i64 types at this stage, we can only
3783 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3784 // way we can use RVV instructions to splat.
3785 assert((ViaIntVT.bitsLE(XLenVT) ||
3786 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3787 "Unexpected bitcast sequence");
3788 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3789 SDValue ViaVL =
3790 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3791 MVT ViaContainerVT =
3792 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3793 SDValue Splat =
3794 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3795 DAG.getUNDEF(ViaContainerVT),
3796 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3797 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3798 if (ViaVecLen != RequiredVL)
3800 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3801 DAG.getConstant(0, DL, XLenVT));
3802 return DAG.getBitcast(VT, Splat);
3803 }
3804 }
3805
3806 // If the number of signbits allows, see if we can lower as a <N x i8>.
3807 // Our main goal here is to reduce LMUL (and thus work) required to
3808 // build the constant, but we will also narrow if the resulting
3809 // narrow vector is known to materialize cheaply.
3810 // TODO: We really should be costing the smaller vector. There are
3811 // profitable cases this misses.
3812 if (EltBitSize > 8 && VT.isInteger() &&
3813 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3814 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3815 if (EltBitSize - SignBits < 8) {
3816 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3817 DL, Op->ops());
3818 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3819 Source, DAG, Subtarget);
3820 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3821 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3822 }
3823 }
3824
3825 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3826 return Res;
3827
3828 // For constant vectors, use generic constant pool lowering. Otherwise,
3829 // we'd have to materialize constants in GPRs just to move them into the
3830 // vector.
3831 return SDValue();
3832}
3833
3835 const RISCVSubtarget &Subtarget) {
3836 MVT VT = Op.getSimpleValueType();
3837 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3838
3839 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3841 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3842
3843 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3844
3845 SDLoc DL(Op);
3846 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3847
3848 MVT XLenVT = Subtarget.getXLenVT();
3849
3850 if (VT.getVectorElementType() == MVT::i1) {
3851 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3852 // vector type, we have a legal equivalently-sized i8 type, so we can use
3853 // that.
3854 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3855 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3856
3857 SDValue WideVec;
3858 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3859 // For a splat, perform a scalar truncate before creating the wider
3860 // vector.
3861 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3862 DAG.getConstant(1, DL, Splat.getValueType()));
3863 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3864 } else {
3865 SmallVector<SDValue, 8> Ops(Op->op_values());
3866 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3867 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3868 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3869 }
3870
3871 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3872 }
3873
3874 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3875 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3876 return Gather;
3877 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3879 if (!VT.isFloatingPoint())
3880 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3881 Splat =
3882 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3883 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3884 }
3885
3886 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3887 return Res;
3888
3889 // If we're compiling for an exact VLEN value, we can split our work per
3890 // register in the register group.
3891 if (const auto VLen = Subtarget.getRealVLen();
3892 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3893 MVT ElemVT = VT.getVectorElementType();
3894 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3895 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3896 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3897 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3898 assert(M1VT == getLMUL1VT(M1VT));
3899
3900 // The following semantically builds up a fixed length concat_vector
3901 // of the component build_vectors. We eagerly lower to scalable and
3902 // insert_subvector here to avoid DAG combining it back to a large
3903 // build_vector.
3904 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3905 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3906 SDValue Vec = DAG.getUNDEF(ContainerVT);
3907 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3908 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3909 SDValue SubBV =
3910 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3911 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3912 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3913 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3914 DAG.getVectorIdxConstant(InsertIdx, DL));
3915 }
3916 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3917 }
3918
3919 // For m1 vectors, if we have non-undef values in both halves of our vector,
3920 // split the vector into low and high halves, build them separately, then
3921 // use a vselect to combine them. For long vectors, this cuts the critical
3922 // path of the vslide1down sequence in half, and gives us an opportunity
3923 // to special case each half independently. Note that we don't change the
3924 // length of the sub-vectors here, so if both fallback to the generic
3925 // vslide1down path, we should be able to fold the vselect into the final
3926 // vslidedown (for the undef tail) for the first half w/ masking.
3927 unsigned NumElts = VT.getVectorNumElements();
3928 unsigned NumUndefElts =
3929 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3930 unsigned NumDefElts = NumElts - NumUndefElts;
3931 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3932 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3933 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3934 SmallVector<SDValue> MaskVals;
3935 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
3936 SubVecAOps.reserve(NumElts);
3937 SubVecBOps.reserve(NumElts);
3938 for (unsigned i = 0; i < NumElts; i++) {
3939 SDValue Elem = Op->getOperand(i);
3940 if (i < NumElts / 2) {
3941 SubVecAOps.push_back(Elem);
3942 SubVecBOps.push_back(UndefElem);
3943 } else {
3944 SubVecAOps.push_back(UndefElem);
3945 SubVecBOps.push_back(Elem);
3946 }
3947 bool SelectMaskVal = (i < NumElts / 2);
3948 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3949 }
3950 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3951 MaskVals.size() == NumElts);
3952
3953 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
3954 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
3955 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3956 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3957 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
3958 }
3959
3960 // Cap the cost at a value linear to the number of elements in the vector.
3961 // The default lowering is to use the stack. The vector store + scalar loads
3962 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3963 // being (at least) linear in LMUL. As a result, using the vslidedown
3964 // lowering for every element ends up being VL*LMUL..
3965 // TODO: Should we be directly costing the stack alternative? Doing so might
3966 // give us a more accurate upper bound.
3967 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3968
3969 // TODO: unify with TTI getSlideCost.
3970 InstructionCost PerSlideCost = 1;
3971 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3972 default: break;
3974 PerSlideCost = 2;
3975 break;
3977 PerSlideCost = 4;
3978 break;
3980 PerSlideCost = 8;
3981 break;
3982 }
3983
3984 // TODO: Should we be using the build instseq then cost + evaluate scheme
3985 // we use for integer constants here?
3986 unsigned UndefCount = 0;
3987 for (const SDValue &V : Op->ops()) {
3988 if (V.isUndef()) {
3989 UndefCount++;
3990 continue;
3991 }
3992 if (UndefCount) {
3993 LinearBudget -= PerSlideCost;
3994 UndefCount = 0;
3995 }
3996 LinearBudget -= PerSlideCost;
3997 }
3998 if (UndefCount) {
3999 LinearBudget -= PerSlideCost;
4000 }
4001
4002 if (LinearBudget < 0)
4003 return SDValue();
4004
4005 assert((!VT.isFloatingPoint() ||
4006 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4007 "Illegal type which will result in reserved encoding");
4008
4009 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4010
4011 SDValue Vec;
4012 UndefCount = 0;
4013 for (SDValue V : Op->ops()) {
4014 if (V.isUndef()) {
4015 UndefCount++;
4016 continue;
4017 }
4018
4019 // Start our sequence with a TA splat in the hopes that hardware is able to
4020 // recognize there's no dependency on the prior value of our temporary
4021 // register.
4022 if (!Vec) {
4023 Vec = DAG.getSplatVector(VT, DL, V);
4024 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4025 UndefCount = 0;
4026 continue;
4027 }
4028
4029 if (UndefCount) {
4030 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4031 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4032 Vec, Offset, Mask, VL, Policy);
4033 UndefCount = 0;
4034 }
4035 auto OpCode =
4037 if (!VT.isFloatingPoint())
4038 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4039 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4040 V, Mask, VL);
4041 }
4042 if (UndefCount) {
4043 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4044 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4045 Vec, Offset, Mask, VL, Policy);
4046 }
4047 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4048}
4049
4050static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4052 SelectionDAG &DAG) {
4053 if (!Passthru)
4054 Passthru = DAG.getUNDEF(VT);
4055 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4056 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4057 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4058 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4059 // node in order to try and match RVV vector/scalar instructions.
4060 if ((LoC >> 31) == HiC)
4061 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4062
4063 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4064 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4065 // vlmax vsetvli or vsetivli to change the VL.
4066 // FIXME: Support larger constants?
4067 // FIXME: Support non-constant VLs by saturating?
4068 if (LoC == HiC) {
4069 SDValue NewVL;
4070 if (isAllOnesConstant(VL) ||
4071 (isa<RegisterSDNode>(VL) &&
4072 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4073 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4074 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4075 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4076
4077 if (NewVL) {
4078 MVT InterVT =
4079 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4080 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4081 DAG.getUNDEF(InterVT), Lo, NewVL);
4082 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4083 }
4084 }
4085 }
4086
4087 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4088 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4089 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4090 Hi.getConstantOperandVal(1) == 31)
4091 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4092
4093 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4094 // even if it might be sign extended.
4095 if (Hi.isUndef())
4096 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4097
4098 // Fall back to a stack store and stride x0 vector load.
4099 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4100 Hi, VL);
4101}
4102
4103// Called by type legalization to handle splat of i64 on RV32.
4104// FIXME: We can optimize this when the type has sign or zero bits in one
4105// of the halves.
4106static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4107 SDValue Scalar, SDValue VL,
4108 SelectionDAG &DAG) {
4109 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4110 SDValue Lo, Hi;
4111 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4112 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4113}
4114
4115// This function lowers a splat of a scalar operand Splat with the vector
4116// length VL. It ensures the final sequence is type legal, which is useful when
4117// lowering a splat after type legalization.
4118static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4119 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4120 const RISCVSubtarget &Subtarget) {
4121 bool HasPassthru = Passthru && !Passthru.isUndef();
4122 if (!HasPassthru && !Passthru)
4123 Passthru = DAG.getUNDEF(VT);
4124 if (VT.isFloatingPoint())
4125 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4126
4127 MVT XLenVT = Subtarget.getXLenVT();
4128
4129 // Simplest case is that the operand needs to be promoted to XLenVT.
4130 if (Scalar.getValueType().bitsLE(XLenVT)) {
4131 // If the operand is a constant, sign extend to increase our chances
4132 // of being able to use a .vi instruction. ANY_EXTEND would become a
4133 // a zero extend and the simm5 check in isel would fail.
4134 // FIXME: Should we ignore the upper bits in isel instead?
4135 unsigned ExtOpc =
4136 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4137 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4138 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4139 }
4140
4141 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4142 "Unexpected scalar for splat lowering!");
4143
4144 if (isOneConstant(VL) && isNullConstant(Scalar))
4145 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4146 DAG.getConstant(0, DL, XLenVT), VL);
4147
4148 // Otherwise use the more complicated splatting algorithm.
4149 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4150}
4151
4152// This function lowers an insert of a scalar operand Scalar into lane
4153// 0 of the vector regardless of the value of VL. The contents of the
4154// remaining lanes of the result vector are unspecified. VL is assumed
4155// to be non-zero.
4157 const SDLoc &DL, SelectionDAG &DAG,
4158 const RISCVSubtarget &Subtarget) {
4159 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4160
4161 const MVT XLenVT = Subtarget.getXLenVT();
4162 SDValue Passthru = DAG.getUNDEF(VT);
4163
4164 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4165 isNullConstant(Scalar.getOperand(1))) {
4166 SDValue ExtractedVal = Scalar.getOperand(0);
4167 // The element types must be the same.
4168 if (ExtractedVal.getValueType().getVectorElementType() ==
4169 VT.getVectorElementType()) {
4170 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4171 MVT ExtractedContainerVT = ExtractedVT;
4172 if (ExtractedContainerVT.isFixedLengthVector()) {
4173 ExtractedContainerVT = getContainerForFixedLengthVector(
4174 DAG, ExtractedContainerVT, Subtarget);
4175 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4176 ExtractedVal, DAG, Subtarget);
4177 }
4178 if (ExtractedContainerVT.bitsLE(VT))
4179 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4180 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4181 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4182 DAG.getVectorIdxConstant(0, DL));
4183 }
4184 }
4185
4186
4187 if (VT.isFloatingPoint())
4188 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4189 DAG.getUNDEF(VT), Scalar, VL);
4190
4191 // Avoid the tricky legalization cases by falling back to using the
4192 // splat code which already handles it gracefully.
4193 if (!Scalar.getValueType().bitsLE(XLenVT))
4194 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4195 DAG.getConstant(1, DL, XLenVT),
4196 VT, DL, DAG, Subtarget);
4197
4198 // If the operand is a constant, sign extend to increase our chances
4199 // of being able to use a .vi instruction. ANY_EXTEND would become a
4200 // a zero extend and the simm5 check in isel would fail.
4201 // FIXME: Should we ignore the upper bits in isel instead?
4202 unsigned ExtOpc =
4203 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4204 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4205 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4206 DAG.getUNDEF(VT), Scalar, VL);
4207}
4208
4209// Is this a shuffle extracts either the even or odd elements of a vector?
4210// That is, specifically, either (a) or (b) below.
4211// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4212// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4213// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4214// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4215// Returns {Src Vector, Even Elements} om success
4216static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4217 SDValue V2, ArrayRef<int> Mask,
4218 const RISCVSubtarget &Subtarget) {
4219 // Need to be able to widen the vector.
4220 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4221 return false;
4222
4223 // Both input must be extracts.
4224 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4225 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4226 return false;
4227
4228 // Extracting from the same source.
4229 SDValue Src = V1.getOperand(0);
4230 if (Src != V2.getOperand(0))
4231 return false;
4232
4233 // Src needs to have twice the number of elements.
4234 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4235 return false;
4236
4237 // The extracts must extract the two halves of the source.
4238 if (V1.getConstantOperandVal(1) != 0 ||
4239 V2.getConstantOperandVal(1) != Mask.size())
4240 return false;
4241
4242 // First index must be the first even or odd element from V1.
4243 if (Mask[0] != 0 && Mask[0] != 1)
4244 return false;
4245
4246 // The others must increase by 2 each time.
4247 // TODO: Support undef elements?
4248 for (unsigned i = 1; i != Mask.size(); ++i)
4249 if (Mask[i] != Mask[i - 1] + 2)
4250 return false;
4251
4252 return true;
4253}
4254
4255/// Is this shuffle interleaving contiguous elements from one vector into the
4256/// even elements and contiguous elements from another vector into the odd
4257/// elements. \p EvenSrc will contain the element that should be in the first
4258/// even element. \p OddSrc will contain the element that should be in the first
4259/// odd element. These can be the first element in a source or the element half
4260/// way through the source.
4261static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4262 int &OddSrc, const RISCVSubtarget &Subtarget) {
4263 // We need to be able to widen elements to the next larger integer type.
4264 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4265 return false;
4266
4267 int Size = Mask.size();
4268 int NumElts = VT.getVectorNumElements();
4269 assert(Size == (int)NumElts && "Unexpected mask size");
4270
4271 SmallVector<unsigned, 2> StartIndexes;
4272 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4273 return false;
4274
4275 EvenSrc = StartIndexes[0];
4276 OddSrc = StartIndexes[1];
4277
4278 // One source should be low half of first vector.
4279 if (EvenSrc != 0 && OddSrc != 0)
4280 return false;
4281
4282 // Subvectors will be subtracted from either at the start of the two input
4283 // vectors, or at the start and middle of the first vector if it's an unary
4284 // interleave.
4285 // In both cases, HalfNumElts will be extracted.
4286 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4287 // we'll create an illegal extract_subvector.
4288 // FIXME: We could support other values using a slidedown first.
4289 int HalfNumElts = NumElts / 2;
4290 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4291}
4292
4293/// Match shuffles that concatenate two vectors, rotate the concatenation,
4294/// and then extract the original number of elements from the rotated result.
4295/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4296/// returned rotation amount is for a rotate right, where elements move from
4297/// higher elements to lower elements. \p LoSrc indicates the first source
4298/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4299/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4300/// 0 or 1 if a rotation is found.
4301///
4302/// NOTE: We talk about rotate to the right which matches how bit shift and
4303/// rotate instructions are described where LSBs are on the right, but LLVM IR
4304/// and the table below write vectors with the lowest elements on the left.
4305static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4306 int Size = Mask.size();
4307
4308 // We need to detect various ways of spelling a rotation:
4309 // [11, 12, 13, 14, 15, 0, 1, 2]
4310 // [-1, 12, 13, 14, -1, -1, 1, -1]
4311 // [-1, -1, -1, -1, -1, -1, 1, 2]
4312 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4313 // [-1, 4, 5, 6, -1, -1, 9, -1]
4314 // [-1, 4, 5, 6, -1, -1, -1, -1]
4315 int Rotation = 0;
4316 LoSrc = -1;
4317 HiSrc = -1;
4318 for (int i = 0; i != Size; ++i) {
4319 int M = Mask[i];
4320 if (M < 0)
4321 continue;
4322
4323 // Determine where a rotate vector would have started.
4324 int StartIdx = i - (M % Size);
4325 // The identity rotation isn't interesting, stop.
4326 if (StartIdx == 0)
4327 return -1;
4328
4329 // If we found the tail of a vector the rotation must be the missing
4330 // front. If we found the head of a vector, it must be how much of the
4331 // head.
4332 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4333
4334 if (Rotation == 0)
4335 Rotation = CandidateRotation;
4336 else if (Rotation != CandidateRotation)
4337 // The rotations don't match, so we can't match this mask.
4338 return -1;
4339
4340 // Compute which value this mask is pointing at.
4341 int MaskSrc = M < Size ? 0 : 1;
4342
4343 // Compute which of the two target values this index should be assigned to.
4344 // This reflects whether the high elements are remaining or the low elemnts
4345 // are remaining.
4346 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4347
4348 // Either set up this value if we've not encountered it before, or check
4349 // that it remains consistent.
4350 if (TargetSrc < 0)
4351 TargetSrc = MaskSrc;
4352 else if (TargetSrc != MaskSrc)
4353 // This may be a rotation, but it pulls from the inputs in some
4354 // unsupported interleaving.
4355 return -1;
4356 }
4357
4358 // Check that we successfully analyzed the mask, and normalize the results.
4359 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4360 assert((LoSrc >= 0 || HiSrc >= 0) &&
4361 "Failed to find a rotated input vector!");
4362
4363 return Rotation;
4364}
4365
4366// Lower a deinterleave shuffle to vnsrl.
4367// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4368// -> [p, q, r, s] (EvenElts == false)
4369// VT is the type of the vector to return, <[vscale x ]n x ty>
4370// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4372 bool EvenElts,
4373 const RISCVSubtarget &Subtarget,
4374 SelectionDAG &DAG) {
4375 // The result is a vector of type <m x n x ty>
4376 MVT ContainerVT = VT;
4377 // Convert fixed vectors to scalable if needed
4378 if (ContainerVT.isFixedLengthVector()) {
4379 assert(Src.getSimpleValueType().isFixedLengthVector());
4380 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4381
4382 // The source is a vector of type <m x n*2 x ty>
4383 MVT SrcContainerVT =
4385 ContainerVT.getVectorElementCount() * 2);
4386 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4387 }
4388
4389 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4390
4391 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4392 // This also converts FP to int.
4393 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4394 MVT WideSrcContainerVT = MVT::getVectorVT(
4395 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4396 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4397
4398 // The integer version of the container type.
4399 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4400
4401 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4402 // the original element size.
4403 unsigned Shift = EvenElts ? 0 : EltBits;
4404 SDValue SplatShift = DAG.getNode(
4405 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4406 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4407 SDValue Res =
4408 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4409 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4410 // Cast back to FP if needed.
4411 Res = DAG.getBitcast(ContainerVT, Res);
4412
4413 if (VT.isFixedLengthVector())
4414 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4415 return Res;
4416}
4417
4418// Lower the following shuffle to vslidedown.
4419// a)
4420// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4421// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4422// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4423// b)
4424// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4425// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4426// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4427// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4428// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4429// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4431 SDValue V1, SDValue V2,
4432 ArrayRef<int> Mask,
4433 const RISCVSubtarget &Subtarget,
4434 SelectionDAG &DAG) {
4435 auto findNonEXTRACT_SUBVECTORParent =
4436 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4437 uint64_t Offset = 0;
4438 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4439 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4440 // a scalable vector. But we don't want to match the case.
4441 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4442 Offset += Parent.getConstantOperandVal(1);
4443 Parent = Parent.getOperand(0);
4444 }
4445 return std::make_pair(Parent, Offset);
4446 };
4447
4448 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4449 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4450
4451 // Extracting from the same source.
4452 SDValue Src = V1Src;
4453 if (Src != V2Src)
4454 return SDValue();
4455
4456 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4457 SmallVector<int, 16> NewMask(Mask);
4458 for (size_t i = 0; i != NewMask.size(); ++i) {
4459 if (NewMask[i] == -1)
4460 continue;
4461
4462 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4463 NewMask[i] = NewMask[i] + V1IndexOffset;
4464 } else {
4465 // Minus NewMask.size() is needed. Otherwise, the b case would be
4466 // <5,6,7,12> instead of <5,6,7,8>.
4467 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4468 }
4469 }
4470
4471 // First index must be known and non-zero. It will be used as the slidedown
4472 // amount.
4473 if (NewMask[0] <= 0)
4474 return SDValue();
4475
4476 // NewMask is also continuous.
4477 for (unsigned i = 1; i != NewMask.size(); ++i)
4478 if (NewMask[i - 1] + 1 != NewMask[i])
4479 return SDValue();
4480
4481 MVT XLenVT = Subtarget.getXLenVT();
4482 MVT SrcVT = Src.getSimpleValueType();
4483 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4484 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4485 SDValue Slidedown =
4486 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4487 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4488 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4489 return DAG.getNode(
4491 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4492 DAG.getConstant(0, DL, XLenVT));
4493}
4494
4495// Because vslideup leaves the destination elements at the start intact, we can
4496// use it to perform shuffles that insert subvectors:
4497//
4498// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4499// ->
4500// vsetvli zero, 8, e8, mf2, ta, ma
4501// vslideup.vi v8, v9, 4
4502//
4503// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4504// ->
4505// vsetvli zero, 5, e8, mf2, tu, ma
4506// vslideup.v1 v8, v9, 2
4508 SDValue V1, SDValue V2,
4509 ArrayRef<int> Mask,
4510 const RISCVSubtarget &Subtarget,
4511 SelectionDAG &DAG) {
4512 unsigned NumElts = VT.getVectorNumElements();
4513 int NumSubElts, Index;
4514 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4515 Index))
4516 return SDValue();
4517
4518 bool OpsSwapped = Mask[Index] < (int)NumElts;
4519 SDValue InPlace = OpsSwapped ? V2 : V1;
4520 SDValue ToInsert = OpsSwapped ? V1 : V2;
4521
4522 MVT XLenVT = Subtarget.getXLenVT();
4523 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4524 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4525 // We slide up by the index that the subvector is being inserted at, and set
4526 // VL to the index + the number of elements being inserted.
4528 // If the we're adding a suffix to the in place vector, i.e. inserting right
4529 // up to the very end of it, then we don't actually care about the tail.
4530 if (NumSubElts + Index >= (int)NumElts)
4531 Policy |= RISCVII::TAIL_AGNOSTIC;
4532
4533 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4534 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4535 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4536
4537 SDValue Res;
4538 // If we're inserting into the lowest elements, use a tail undisturbed
4539 // vmv.v.v.
4540 if (Index == 0)
4541 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4542 VL);
4543 else
4544 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4545 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4546 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4547}
4548
4549/// Match v(f)slide1up/down idioms. These operations involve sliding
4550/// N-1 elements to make room for an inserted scalar at one end.
4552 SDValue V1, SDValue V2,
4553 ArrayRef<int> Mask,
4554 const RISCVSubtarget &Subtarget,
4555 SelectionDAG &DAG) {
4556 bool OpsSwapped = false;
4557 if (!isa<BuildVectorSDNode>(V1)) {
4558 if (!isa<BuildVectorSDNode>(V2))
4559 return SDValue();
4560 std::swap(V1, V2);
4561 OpsSwapped = true;
4562 }
4563 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4564 if (!Splat)
4565 return SDValue();
4566
4567 // Return true if the mask could describe a slide of Mask.size() - 1
4568 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4569 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4570 const unsigned S = (Offset > 0) ? 0 : -Offset;
4571 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4572 for (unsigned i = S; i != E; ++i)
4573 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4574 return false;
4575 return true;
4576 };
4577
4578 const unsigned NumElts = VT.getVectorNumElements();
4579 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4580 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4581 return SDValue();
4582
4583 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4584 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4585 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4586 return SDValue();
4587
4588 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4589 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4590 auto OpCode = IsVSlidedown ?
4593 if (!VT.isFloatingPoint())
4594 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4595 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4596 DAG.getUNDEF(ContainerVT),
4597 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4598 Splat, TrueMask, VL);
4599 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4600}
4601
4602// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4603// to create an interleaved vector of <[vscale x] n*2 x ty>.
4604// This requires that the size of ty is less than the subtarget's maximum ELEN.
4606 const SDLoc &DL, SelectionDAG &DAG,
4607 const RISCVSubtarget &Subtarget) {
4608 MVT VecVT = EvenV.getSimpleValueType();
4609 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4610 // Convert fixed vectors to scalable if needed
4611 if (VecContainerVT.isFixedLengthVector()) {
4612 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4613 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4614 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4615 }
4616
4617 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4618
4619 // We're working with a vector of the same size as the resulting
4620 // interleaved vector, but with half the number of elements and
4621 // twice the SEW (Hence the restriction on not using the maximum
4622 // ELEN)
4623 MVT WideVT =
4625 VecVT.getVectorElementCount());
4626 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4627 if (WideContainerVT.isFixedLengthVector())
4628 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4629
4630 // Bitcast the input vectors to integers in case they are FP
4631 VecContainerVT = VecContainerVT.changeTypeToInteger();
4632 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4633 OddV = DAG.getBitcast(VecContainerVT, OddV);
4634
4635 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4636 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4637
4638 SDValue Interleaved;
4639 if (OddV.isUndef()) {
4640 // If OddV is undef, this is a zero extend.
4641 // FIXME: Not only does this optimize the code, it fixes some correctness
4642 // issues because MIR does not have freeze.
4643 Interleaved =
4644 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4645 } else if (Subtarget.hasStdExtZvbb()) {
4646 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4647 SDValue OffsetVec =
4648 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4649 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4650 OffsetVec, Passthru, Mask, VL);
4651 if (!EvenV.isUndef())
4652 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4653 Interleaved, EvenV, Passthru, Mask, VL);
4654 } else if (EvenV.isUndef()) {
4655 Interleaved =
4656 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4657
4658 SDValue OffsetVec =
4659 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4660 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4661 Interleaved, OffsetVec, Passthru, Mask, VL);
4662 } else {
4663 // FIXME: We should freeze the odd vector here. We already handled the case
4664 // of provably undef/poison above.
4665
4666 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4667 // vwaddu.vv
4668 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4669 OddV, Passthru, Mask, VL);
4670
4671 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4672 SDValue AllOnesVec = DAG.getSplatVector(
4673 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4674 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4675 OddV, AllOnesVec, Passthru, Mask, VL);
4676
4677 // Add the two together so we get
4678 // (OddV * 0xff...ff) + (OddV + EvenV)
4679 // = (OddV * 0x100...00) + EvenV
4680 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4681 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4682 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4683 Interleaved, OddsMul, Passthru, Mask, VL);
4684 }
4685
4686 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4687 MVT ResultContainerVT = MVT::getVectorVT(
4688 VecVT.getVectorElementType(), // Make sure to use original type
4689 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4690 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4691
4692 // Convert back to a fixed vector if needed
4693 MVT ResultVT =
4696 if (ResultVT.isFixedLengthVector())
4697 Interleaved =
4698 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4699
4700 return Interleaved;
4701}
4702
4703// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4704// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4706 SelectionDAG &DAG,
4707 const RISCVSubtarget &Subtarget) {
4708 SDLoc DL(SVN);
4709 MVT VT = SVN->getSimpleValueType(0);
4710 SDValue V = SVN->getOperand(0);
4711 unsigned NumElts = VT.getVectorNumElements();
4712
4713 assert(VT.getVectorElementType() == MVT::i1);
4714
4716 SVN->getMask().size()) ||
4717 !SVN->getOperand(1).isUndef())
4718 return SDValue();
4719
4720 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4721 EVT ViaVT = EVT::getVectorVT(
4722 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4723 EVT ViaBitVT =
4724 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4725
4726 // If we don't have zvbb or the larger element type > ELEN, the operation will
4727 // be illegal.
4729 ViaVT) ||
4730 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4731 return SDValue();
4732
4733 // If the bit vector doesn't fit exactly into the larger element type, we need
4734 // to insert it into the larger vector and then shift up the reversed bits
4735 // afterwards to get rid of the gap introduced.
4736 if (ViaEltSize > NumElts)
4737 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4738 V, DAG.getVectorIdxConstant(0, DL));
4739
4740 SDValue Res =
4741 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4742
4743 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4744 // element type.
4745 if (ViaEltSize > NumElts)
4746 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4747 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4748
4749 Res = DAG.getBitcast(ViaBitVT, Res);
4750
4751 if (ViaEltSize > NumElts)
4752 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4753 DAG.getVectorIdxConstant(0, DL));
4754 return Res;
4755}
4756
4758 SelectionDAG &DAG,
4759 const RISCVSubtarget &Subtarget,
4760 MVT &RotateVT, unsigned &RotateAmt) {
4761 SDLoc DL(SVN);
4762
4763 EVT VT = SVN->getValueType(0);
4764 unsigned NumElts = VT.getVectorNumElements();
4765 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4766 unsigned NumSubElts;
4767 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4768 NumElts, NumSubElts, RotateAmt))
4769 return false;
4770 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4771 NumElts / NumSubElts);
4772
4773 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4774 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4775}
4776
4777// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4778// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4779// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4781 SelectionDAG &DAG,
4782 const RISCVSubtarget &Subtarget) {
4783 SDLoc DL(SVN);
4784
4785 EVT VT = SVN->getValueType(0);
4786 unsigned RotateAmt;
4787 MVT RotateVT;
4788 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4789 return SDValue();
4790
4791 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4792
4793 SDValue Rotate;
4794 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4795 // so canonicalize to vrev8.
4796 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4797 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4798 else
4799 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4800 DAG.getConstant(RotateAmt, DL, RotateVT));
4801
4802 return DAG.getBitcast(VT, Rotate);
4803}
4804
4805// If compiling with an exactly known VLEN, see if we can split a
4806// shuffle on m2 or larger into a small number of m1 sized shuffles
4807// which write each destination registers exactly once.
4809 SelectionDAG &DAG,
4810 const RISCVSubtarget &Subtarget) {
4811 SDLoc DL(SVN);
4812 MVT VT = SVN->getSimpleValueType(0);
4813 SDValue V1 = SVN->getOperand(0);
4814 SDValue V2 = SVN->getOperand(1);
4815 ArrayRef<int> Mask = SVN->getMask();
4816 unsigned NumElts = VT.getVectorNumElements();
4817
4818 // If we don't know exact data layout, not much we can do. If this
4819 // is already m1 or smaller, no point in splitting further.
4820 const auto VLen = Subtarget.getRealVLen();
4821 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4822 return SDValue();
4823
4824 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4825 // expansion for.
4826 unsigned RotateAmt;
4827 MVT RotateVT;
4828 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4829 return SDValue();
4830
4831 MVT ElemVT = VT.getVectorElementType();
4832 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4833 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4834
4836 OutMasks(VRegsPerSrc, {-1, {}});
4837
4838 // Check if our mask can be done as a 1-to-1 mapping from source
4839 // to destination registers in the group without needing to
4840 // write each destination more than once.
4841 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4842 int DstVecIdx = DstIdx / ElemsPerVReg;
4843 int DstSubIdx = DstIdx % ElemsPerVReg;
4844 int SrcIdx = Mask[DstIdx];
4845 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4846 continue;
4847 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4848 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4849 if (OutMasks[DstVecIdx].first == -1)
4850 OutMasks[DstVecIdx].first = SrcVecIdx;
4851 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4852 // Note: This case could easily be handled by keeping track of a chain
4853 // of source values and generating two element shuffles below. This is
4854 // less an implementation question, and more a profitability one.
4855 return SDValue();
4856
4857 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4858 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4859 }
4860
4861 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4862 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4863 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4864 assert(M1VT == getLMUL1VT(M1VT));
4865 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4866 SDValue Vec = DAG.getUNDEF(ContainerVT);
4867 // The following semantically builds up a fixed length concat_vector
4868 // of the component shuffle_vectors. We eagerly lower to scalable here
4869 // to avoid DAG combining it back to a large shuffle_vector again.
4870 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4871 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4872 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4873 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4874 if (SrcVecIdx == -1)
4875 continue;
4876 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4877 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4878 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4879 DAG.getVectorIdxConstant(ExtractIdx, DL));
4880 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4881 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4882 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4883 unsigned InsertIdx = DstVecIdx * NumOpElts;
4884 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4885 DAG.getVectorIdxConstant(InsertIdx, DL));
4886 }
4887 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4888}
4889
4891 const RISCVSubtarget &Subtarget) {
4892 SDValue V1 = Op.getOperand(0);
4893 SDValue V2 = Op.getOperand(1);
4894 SDLoc DL(Op);
4895 MVT XLenVT = Subtarget.getXLenVT();
4896 MVT VT = Op.getSimpleValueType();
4897 unsigned NumElts = VT.getVectorNumElements();
4898 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4899
4900 if (VT.getVectorElementType() == MVT::i1) {
4901 // Lower to a vror.vi of a larger element type if possible before we promote
4902 // i1s to i8s.
4903 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4904 return V;
4905 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4906 return V;
4907
4908 // Promote i1 shuffle to i8 shuffle.
4909 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4910 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4911 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4912 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4913 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4914 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4915 ISD::SETNE);
4916 }
4917
4918 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4919
4920 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4921
4922 if (SVN->isSplat()) {
4923 const int Lane = SVN->getSplatIndex();
4924 if (Lane >= 0) {
4925 MVT SVT = VT.getVectorElementType();
4926
4927 // Turn splatted vector load into a strided load with an X0 stride.
4928 SDValue V = V1;
4929 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4930 // with undef.
4931 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4932 int Offset = Lane;
4933 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4934 int OpElements =
4935 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4936 V = V.getOperand(Offset / OpElements);
4937 Offset %= OpElements;
4938 }
4939
4940 // We need to ensure the load isn't atomic or volatile.
4941 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4942 auto *Ld = cast<LoadSDNode>(V);
4943 Offset *= SVT.getStoreSize();
4944 SDValue NewAddr = DAG.getMemBasePlusOffset(
4945 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
4946
4947 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4948 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4949 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4950 SDValue IntID =
4951 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4952 SDValue Ops[] = {Ld->getChain(),
4953 IntID,
4954 DAG.getUNDEF(ContainerVT),
4955 NewAddr,
4956 DAG.getRegister(RISCV::X0, XLenVT),
4957 VL};
4958 SDValue NewLoad = DAG.getMemIntrinsicNode(
4959 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4961 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4962 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4963 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4964 }
4965
4966 // Otherwise use a scalar load and splat. This will give the best
4967 // opportunity to fold a splat into the operation. ISel can turn it into
4968 // the x0 strided load if we aren't able to fold away the select.
4969 if (SVT.isFloatingPoint())
4970 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4971 Ld->getPointerInfo().getWithOffset(Offset),
4972 Ld->getOriginalAlign(),
4973 Ld->getMemOperand()->getFlags());
4974 else
4975 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4976 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4977 Ld->getOriginalAlign(),
4978 Ld->getMemOperand()->getFlags());
4980
4981 unsigned Opc =
4983 SDValue Splat =
4984 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4985 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4986 }
4987
4988 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4989 assert(Lane < (int)NumElts && "Unexpected lane!");
4990 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4991 V1, DAG.getConstant(Lane, DL, XLenVT),
4992 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4993 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4994 }
4995 }
4996
4997 // For exact VLEN m2 or greater, try to split to m1 operations if we
4998 // can split cleanly.
4999 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5000 return V;
5001
5002 ArrayRef<int> Mask = SVN->getMask();
5003
5004 if (SDValue V =
5005 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5006 return V;
5007
5008 if (SDValue V =
5009 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5010 return V;
5011
5012 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5013 // available.
5014 if (Subtarget.hasStdExtZvkb())
5015 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5016 return V;
5017
5018 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5019 // be undef which can be handled with a single SLIDEDOWN/UP.
5020 int LoSrc, HiSrc;
5021 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5022 if (Rotation > 0) {
5023 SDValue LoV, HiV;
5024 if (LoSrc >= 0) {
5025 LoV = LoSrc == 0 ? V1 : V2;
5026 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5027 }
5028 if (HiSrc >= 0) {
5029 HiV = HiSrc == 0 ? V1 : V2;
5030 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5031 }
5032
5033 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5034 // to slide LoV up by (NumElts - Rotation).
5035 unsigned InvRotate = NumElts - Rotation;
5036
5037 SDValue Res = DAG.getUNDEF(ContainerVT);
5038 if (HiV) {
5039 // Even though we could use a smaller VL, don't to avoid a vsetivli
5040 // toggle.
5041 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5042 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5043 }
5044 if (LoV)
5045 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5046 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5048
5049 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5050 }
5051
5052 // If this is a deinterleave and we can widen the vector, then we can use
5053 // vnsrl to deinterleave.
5054 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5055 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5056 Subtarget, DAG);
5057 }
5058
5059 if (SDValue V =
5060 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5061 return V;
5062
5063 // Detect an interleave shuffle and lower to
5064 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5065 int EvenSrc, OddSrc;
5066 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5067 // Extract the halves of the vectors.
5068 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5069
5070 int Size = Mask.size();
5071 SDValue EvenV, OddV;
5072 assert(EvenSrc >= 0 && "Undef source?");
5073 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5074 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5075 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5076
5077 assert(OddSrc >= 0 && "Undef source?");
5078 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5079 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5080 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5081
5082 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5083 }
5084
5085
5086 // Handle any remaining single source shuffles
5087 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5088 if (V2.isUndef()) {
5089 // We might be able to express the shuffle as a bitrotate. But even if we
5090 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5091 // shifts and a vor will have a higher throughput than a vrgather.
5092 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5093 return V;
5094
5095 if (VT.getScalarSizeInBits() == 8 &&
5096 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5097 // On such a vector we're unable to use i8 as the index type.
5098 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5099 // may involve vector splitting if we're already at LMUL=8, or our
5100 // user-supplied maximum fixed-length LMUL.
5101 return SDValue();
5102 }
5103
5104 // Base case for the two operand recursion below - handle the worst case
5105 // single source shuffle.
5106 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5107 MVT IndexVT = VT.changeTypeToInteger();
5108 // Since we can't introduce illegal index types at this stage, use i16 and
5109 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5110 // than XLenVT.
5111 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5112 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5113 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5114 }
5115
5116 // If the mask allows, we can do all the index computation in 16 bits. This
5117 // requires less work and less register pressure at high LMUL, and creates
5118 // smaller constants which may be cheaper to materialize.
5119 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5120 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5121 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5122 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5123 }
5124
5125 MVT IndexContainerVT =
5126 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5127
5128 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5129 SmallVector<SDValue> GatherIndicesLHS;
5130 for (int MaskIndex : Mask) {
5131 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5132 GatherIndicesLHS.push_back(IsLHSIndex
5133 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5134 : DAG.getUNDEF(XLenVT));
5135 }
5136 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5137 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5138 Subtarget);
5139 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5140 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5141 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5142 }
5143
5144 // By default we preserve the original operand order, and use a mask to
5145 // select LHS as true and RHS as false. However, since RVV vector selects may
5146 // feature splats but only on the LHS, we may choose to invert our mask and
5147 // instead select between RHS and LHS.
5148 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5149
5150 // Detect shuffles which can be re-expressed as vector selects; these are
5151 // shuffles in which each element in the destination is taken from an element
5152 // at the corresponding index in either source vectors.
5153 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
5154 int MaskIndex = MaskIdx.value();
5155 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5156 });
5157 if (IsSelect) {
5158 // Now construct the mask that will be used by the vselect operation.
5159 SmallVector<SDValue> MaskVals;
5160 for (int MaskIndex : Mask) {
5161 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5162 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5163 }
5164
5165 if (SwapOps)
5166 std::swap(V1, V2);
5167
5168 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5169 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5170 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5171 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5172 }
5173
5174 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5175 // merged with a second vrgather.
5176 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5177 SmallVector<SDValue> MaskVals;
5178
5179 // Now construct the mask that will be used by the blended vrgather operation.
5180 // Cconstruct the appropriate indices into each vector.
5181 for (int MaskIndex : Mask) {
5182 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5183 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5184 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5185 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5186 ? MaskIndex : -1);
5187 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5188 }
5189
5190 if (SwapOps) {
5191 std::swap(V1, V2);
5192 std::swap(ShuffleMaskLHS, ShuffleMaskRHS);
5193 }
5194
5195 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5196 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5197 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5198
5199 // Recursively invoke lowering for each operand if we had two
5200 // independent single source shuffles, and then combine the result via a
5201 // vselect. Note that the vselect will likely be folded back into the
5202 // second permute (vrgather, or other) by the post-isel combine.
5203 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5204 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5205 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5206}
5207
5209 // Support splats for any type. These should type legalize well.
5210 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5211 return true;
5212
5213 // Only support legal VTs for other shuffles for now.
5214 if (!isTypeLegal(VT))
5215 return false;
5216
5217 MVT SVT = VT.getSimpleVT();
5218
5219 // Not for i1 vectors.
5220 if (SVT.getScalarType() == MVT::i1)
5221 return false;
5222
5223 int Dummy1, Dummy2;
5224 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5225 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5226}
5227
5228// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5229// the exponent.
5230SDValue
5231RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5232 SelectionDAG &DAG) const {
5233 MVT VT = Op.getSimpleValueType();
5234 unsigned EltSize = VT.getScalarSizeInBits();
5235 SDValue Src = Op.getOperand(0);
5236 SDLoc DL(Op);
5237 MVT ContainerVT = VT;
5238
5239 SDValue Mask, VL;
5240 if (Op->isVPOpcode()) {
5241 Mask = Op.getOperand(1);
5242 if (VT.isFixedLengthVector())
5243 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5244 Subtarget);
5245 VL = Op.getOperand(2);
5246 }
5247
5248 // We choose FP type that can represent the value if possible. Otherwise, we
5249 // use rounding to zero conversion for correct exponent of the result.
5250 // TODO: Use f16 for i8 when possible?
5251 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5252 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5253 FloatEltVT = MVT::f32;
5254 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5255
5256 // Legal types should have been checked in the RISCVTargetLowering
5257 // constructor.
5258 // TODO: Splitting may make sense in some cases.
5259 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5260 "Expected legal float type!");
5261
5262 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5263 // The trailing zero count is equal to log2 of this single bit value.
5264 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5265 SDValue Neg = DAG.getNegative(Src, DL, VT);
5266 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5267 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5268 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5269 Src, Mask, VL);
5270 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5271 }
5272
5273 // We have a legal FP type, convert to it.
5274 SDValue FloatVal;
5275 if (FloatVT.bitsGT(VT)) {
5276 if (Op->isVPOpcode())
5277 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5278 else
5279 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5280 } else {
5281 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5282 if (VT.isFixedLengthVector()) {
5283 ContainerVT = getContainerForFixedLengthVector(VT);
5284 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5285 }
5286 if (!Op->isVPOpcode())
5287 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5288 SDValue RTZRM =
5290 MVT ContainerFloatVT =
5291 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5292 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5293 Src, Mask, RTZRM, VL);
5294 if (VT.isFixedLengthVector())
5295 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5296 }
5297 // Bitcast to integer and shift the exponent to the LSB.
5298 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5299 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5300 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5301
5302 SDValue Exp;
5303 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5304 if (Op->isVPOpcode()) {
5305 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
5306 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5307 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5308 } else {
5309 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5310 DAG.getConstant(ShiftAmt, DL, IntVT));
5311 if (IntVT.bitsLT(VT))
5312 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5313 else if (IntVT.bitsGT(VT))
5314 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5315 }
5316
5317 // The exponent contains log2 of the value in biased form.
5318 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5319 // For trailing zeros, we just need to subtract the bias.
5320 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5321 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5322 DAG.getConstant(ExponentBias, DL, VT));
5323 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5324 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5325 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5326
5327 // For leading zeros, we need to remove the bias and convert from log2 to
5328 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5329 unsigned Adjust = ExponentBias + (EltSize - 1);
5330 SDValue Res;
5331 if (Op->isVPOpcode())
5332 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5333 Mask, VL);
5334 else
5335 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5336
5337 // The above result with zero input equals to Adjust which is greater than
5338 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5339 if (Op.getOpcode() == ISD::CTLZ)
5340 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5341 else if (Op.getOpcode() == ISD::VP_CTLZ)
5342 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5343 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5344 return Res;
5345}
5346
5347SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5348 SelectionDAG &DAG) const {
5349 SDLoc DL(Op);
5350 MVT XLenVT = Subtarget.getXLenVT();
5351 SDValue Source = Op->getOperand(0);
5352 MVT SrcVT = Source.getSimpleValueType();
5353 SDValue Mask = Op->getOperand(1);
5354 SDValue EVL = Op->getOperand(2);
5355
5356 if (SrcVT.isFixedLengthVector()) {
5357 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5358 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5359 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5360 Subtarget);
5361 SrcVT = ContainerVT;
5362 }
5363
5364 // Convert to boolean vector.
5365 if (SrcVT.getScalarType() != MVT::i1) {
5366 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5367 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5368 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5369 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5370 DAG.getUNDEF(SrcVT), Mask, EVL});
5371 }
5372
5373 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5374 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5375 // In this case, we can interpret poison as -1, so nothing to do further.
5376 return Res;
5377
5378 // Convert -1 to VL.
5379 SDValue SetCC =
5380 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5381 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5382 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5383}
5384
5385// While RVV has alignment restrictions, we should always be able to load as a
5386// legal equivalently-sized byte-typed vector instead. This method is
5387// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5388// the load is already correctly-aligned, it returns SDValue().
5389SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5390 SelectionDAG &DAG) const {
5391 auto *Load = cast<LoadSDNode>(Op);
5392 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5393
5395 Load->getMemoryVT(),
5396 *Load->getMemOperand()))
5397 return SDValue();
5398
5399 SDLoc DL(Op);
5400 MVT VT = Op.getSimpleValueType();
5401 unsigned EltSizeBits = VT.getScalarSizeInBits();
5402 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5403 "Unexpected unaligned RVV load type");
5404 MVT NewVT =
5405 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5406 assert(NewVT.isValid() &&
5407 "Expecting equally-sized RVV vector types to be legal");
5408 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5409 Load->getPointerInfo(), Load->getOriginalAlign(),
5410 Load->getMemOperand()->getFlags());
5411 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5412}
5413
5414// While RVV has alignment restrictions, we should always be able to store as a
5415// legal equivalently-sized byte-typed vector instead. This method is
5416// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5417// returns SDValue() if the store is already correctly aligned.
5418SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5419 SelectionDAG &DAG) const {
5420 auto *Store = cast<StoreSDNode>(Op);
5421 assert(Store && Store->getValue().getValueType().isVector() &&
5422 "Expected vector store");
5423
5425 Store->getMemoryVT(),
5426 *Store->getMemOperand()))
5427 return SDValue();
5428
5429 SDLoc DL(Op);
5430 SDValue StoredVal = Store->getValue();
5431 MVT VT = StoredVal.getSimpleValueType();
5432 unsigned EltSizeBits = VT.getScalarSizeInBits();
5433 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5434 "Unexpected unaligned RVV store type");
5435 MVT NewVT =
5436 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5437 assert(NewVT.isValid() &&
5438 "Expecting equally-sized RVV vector types to be legal");
5439 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5440 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5441 Store->getPointerInfo(), Store->getOriginalAlign(),
5442 Store->getMemOperand()->getFlags());
5443}
5444
5446 const RISCVSubtarget &Subtarget) {
5447 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5448
5449 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5450
5451 // All simm32 constants should be handled by isel.
5452 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5453 // this check redundant, but small immediates are common so this check
5454 // should have better compile time.
5455 if (isInt<32>(Imm))
5456 return Op;
5457
5458 // We only need to cost the immediate, if constant pool lowering is enabled.
5459 if (!Subtarget.useConstantPoolForLargeInts())
5460 return Op;
5461
5463 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5464 return Op;
5465
5466 // Optimizations below are disabled for opt size. If we're optimizing for
5467 // size, use a constant pool.
5468 if (DAG.shouldOptForSize())
5469 return SDValue();
5470
5471 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5472 // that if it will avoid a constant pool.
5473 // It will require an extra temporary register though.
5474 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5475 // low and high 32 bits are the same and bit 31 and 63 are set.
5476 unsigned ShiftAmt, AddOpc;
5477 RISCVMatInt::InstSeq SeqLo =
5478 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5479 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5480 return Op;
5481
5482 return SDValue();
5483}
5484
5486 const RISCVSubtarget &Subtarget) {
5487 SDLoc dl(Op);
5488 AtomicOrdering FenceOrdering =
5489 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5490 SyncScope::ID FenceSSID =
5491 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5492
5493 if (Subtarget.hasStdExtZtso()) {
5494 // The only fence that needs an instruction is a sequentially-consistent
5495 // cross-thread fence.
5496 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5497 FenceSSID == SyncScope::System)
5498 return Op;
5499
5500 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5501 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5502 }
5503
5504 // singlethread fences only synchronize with signal handlers on the same
5505 // thread and thus only need to preserve instruction order, not actually
5506 // enforce memory ordering.
5507 if (FenceSSID == SyncScope::SingleThread)
5508 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5509 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5510
5511 return Op;
5512}
5513
5515 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5516 "Unexpected custom legalisation");
5517
5518 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5519 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5520 SDLoc DL(Op);
5521 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5522 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5523 SDValue Result =
5524 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5525
5526 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5527 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5528 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5529 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5530 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5531 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5532 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5533}
5534
5536 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5537 "Unexpected custom legalisation");
5538
5539 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5540 // sign extend allows overflow of the lower 32 bits to be detected on
5541 // the promoted size.
5542 SDLoc DL(Op);
5543 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5544 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5545 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5546 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5547}
5548
5549// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5551 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5552 "Unexpected custom legalisation");
5553 if (isa<ConstantSDNode>(Op.getOperand(1)))
5554 return SDValue();
5555
5556 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5557 SDLoc DL(Op);
5558 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5559 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5560 SDValue WideOp =
5561 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5562 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5563 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5564 DAG.getValueType(MVT::i32));
5565 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5566 ISD::SETNE);
5567 return DAG.getMergeValues({Res, Ovf}, DL);
5568}
5569
5570// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5572 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5573 "Unexpected custom legalisation");
5574 SDLoc DL(Op);
5575 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5576 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5577 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5578 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5579 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5580 DAG.getValueType(MVT::i32));
5581 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5582 ISD::SETNE);
5583 return DAG.getMergeValues({Res, Ovf}, DL);
5584}
5585
5586SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5587 SelectionDAG &DAG) const {
5588 SDLoc DL(Op);
5589 MVT VT = Op.getSimpleValueType();
5590 MVT XLenVT = Subtarget.getXLenVT();
5591 unsigned Check = Op.getConstantOperandVal(1);
5592 unsigned TDCMask = 0;
5593 if (Check & fcSNan)
5594 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5595 if (Check & fcQNan)
5596 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5597 if (Check & fcPosInf)
5599 if (Check & fcNegInf)
5601 if (Check & fcPosNormal)
5603 if (Check & fcNegNormal)
5605 if (Check & fcPosSubnormal)
5607 if (Check & fcNegSubnormal)
5609 if (Check & fcPosZero)
5610 TDCMask |= RISCV::FPMASK_Positive_Zero;
5611 if (Check & fcNegZero)
5612 TDCMask |= RISCV::FPMASK_Negative_Zero;
5613
5614 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5615
5616 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5617
5618 if (VT.isVector()) {
5619 SDValue Op0 = Op.getOperand(0);
5620 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5621
5622 if (VT.isScalableVector()) {
5624 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5625 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5626 Mask = Op.getOperand(2);
5627 VL = Op.getOperand(3);
5628 }
5629 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5630 VL, Op->getFlags());
5631 if (IsOneBitMask)
5632 return DAG.getSetCC(DL, VT, FPCLASS,
5633 DAG.getConstant(TDCMask, DL, DstVT),
5635 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5636 DAG.getConstant(TDCMask, DL, DstVT));
5637 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5638 ISD::SETNE);
5639 }
5640
5641 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5642 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5643 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5644 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5645 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5646 Mask = Op.getOperand(2);
5647 MVT MaskContainerVT =
5648 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5649 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5650 VL = Op.getOperand(3);
5651 }
5652 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5653
5654 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5655 Mask, VL, Op->getFlags());
5656
5657 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5658 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5659 if (IsOneBitMask) {
5660 SDValue VMSEQ =
5661 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5662 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5663 DAG.getUNDEF(ContainerVT), Mask, VL});
5664 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5665 }
5666 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5667 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5668
5669 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5670 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5671 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5672
5673 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5674 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5675 DAG.getUNDEF(ContainerVT), Mask, VL});
5676 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5677 }
5678
5679 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5680 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5681 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5683 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5684}
5685
5686// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5687// operations propagate nans.
5689 const RISCVSubtarget &Subtarget) {
5690 SDLoc DL(Op);
5691 MVT VT = Op.getSimpleValueType();
5692
5693 SDValue X = Op.getOperand(0);
5694 SDValue Y = Op.getOperand(1);
5695
5696 if (!VT.isVector()) {
5697 MVT XLenVT = Subtarget.getXLenVT();
5698
5699 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5700 // ensures that when one input is a nan, the other will also be a nan
5701 // allowing the nan to propagate. If both inputs are nan, this will swap the
5702 // inputs which is harmless.
5703
5704 SDValue NewY = Y;
5705 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5706 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5707 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5708 }
5709
5710 SDValue NewX = X;
5711 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5712 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5713 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5714 }
5715
5716 unsigned Opc =
5717 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5718 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5719 }
5720
5721 // Check no NaNs before converting to fixed vector scalable.
5722 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5723 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5724
5725 MVT ContainerVT = VT;
5726 if (VT.isFixedLengthVector()) {
5727 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5728 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5729 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5730 }
5731
5732 SDValue Mask, VL;
5733 if (Op->isVPOpcode()) {
5734 Mask = Op.getOperand(2);
5735 if (VT.isFixedLengthVector())
5736 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5737 Subtarget);
5738 VL = Op.getOperand(3);
5739 } else {
5740 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5741 }
5742
5743 SDValue NewY = Y;
5744 if (!XIsNeverNan) {
5745 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5746 {X, X, DAG.getCondCode(ISD::SETOEQ),
5747 DAG.getUNDEF(ContainerVT), Mask, VL});
5748 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5749 DAG.getUNDEF(ContainerVT), VL);
5750 }
5751
5752 SDValue NewX = X;
5753 if (!YIsNeverNan) {
5754 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5755 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5756 DAG.getUNDEF(ContainerVT), Mask, VL});
5757 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5758 DAG.getUNDEF(ContainerVT), VL);
5759 }
5760
5761 unsigned Opc =
5762 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5765 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5766 DAG.getUNDEF(ContainerVT), Mask, VL);
5767 if (VT.isFixedLengthVector())
5768 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5769 return Res;
5770}
5771
5772/// Get a RISC-V target specified VL op for a given SDNode.
5773static unsigned getRISCVVLOp(SDValue Op) {
5774#define OP_CASE(NODE) \
5775 case ISD::NODE: \
5776 return RISCVISD::NODE##_VL;
5777#define VP_CASE(NODE) \
5778 case ISD::VP_##NODE: \
5779 return RISCVISD::NODE##_VL;
5780 // clang-format off
5781 switch (Op.getOpcode()) {
5782 default:
5783 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5784 OP_CASE(ADD)
5785 OP_CASE(SUB)
5786 OP_CASE(MUL)
5787 OP_CASE(MULHS)
5788 OP_CASE(MULHU)
5789 OP_CASE(SDIV)
5790 OP_CASE(SREM)
5791 OP_CASE(UDIV)
5792 OP_CASE(UREM)
5793 OP_CASE(SHL)
5794 OP_CASE(SRA)
5795 OP_CASE(SRL)
5796 OP_CASE(ROTL)
5797 OP_CASE(ROTR)
5798 OP_CASE(BSWAP)
5799 OP_CASE(CTTZ)
5800 OP_CASE(CTLZ)
5801 OP_CASE(CTPOP)
5802 OP_CASE(BITREVERSE)
5803 OP_CASE(SADDSAT)
5804 OP_CASE(UADDSAT)
5805 OP_CASE(SSUBSAT)
5806 OP_CASE(USUBSAT)
5807 OP_CASE(AVGFLOORU)
5808 OP_CASE(AVGCEILU)
5809 OP_CASE(FADD)
5810 OP_CASE(FSUB)
5811 OP_CASE(FMUL)
5812 OP_CASE(FDIV)
5813 OP_CASE(FNEG)
5814 OP_CASE(FABS)
5815 OP_CASE(FSQRT)
5816 OP_CASE(SMIN)
5817 OP_CASE(SMAX)
5818 OP_CASE(UMIN)
5819 OP_CASE(UMAX)
5820 OP_CASE(STRICT_FADD)
5821 OP_CASE(STRICT_FSUB)
5822 OP_CASE(STRICT_FMUL)
5823 OP_CASE(STRICT_FDIV)
5824 OP_CASE(STRICT_FSQRT)
5825 VP_CASE(ADD) // VP_ADD
5826 VP_CASE(SUB) // VP_SUB
5827 VP_CASE(MUL) // VP_MUL
5828 VP_CASE(SDIV) // VP_SDIV
5829 VP_CASE(SREM) // VP_SREM
5830 VP_CASE(UDIV) // VP_UDIV
5831 VP_CASE(UREM) // VP_UREM
5832 VP_CASE(SHL) // VP_SHL
5833 VP_CASE(FADD) // VP_FADD
5834 VP_CASE(FSUB) // VP_FSUB
5835 VP_CASE(FMUL) // VP_FMUL
5836 VP_CASE(FDIV) // VP_FDIV
5837 VP_CASE(FNEG) // VP_FNEG
5838 VP_CASE(FABS) // VP_FABS
5839 VP_CASE(SMIN) // VP_SMIN
5840 VP_CASE(SMAX) // VP_SMAX
5841 VP_CASE(UMIN) // VP_UMIN
5842 VP_CASE(UMAX) // VP_UMAX
5843 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5844 VP_CASE(SETCC) // VP_SETCC
5845 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5846 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5847 VP_CASE(BITREVERSE) // VP_BITREVERSE
5848 VP_CASE(SADDSAT) // VP_SADDSAT
5849 VP_CASE(UADDSAT) // VP_UADDSAT
5850 VP_CASE(SSUBSAT) // VP_SSUBSAT
5851 VP_CASE(USUBSAT) // VP_USUBSAT
5852 VP_CASE(BSWAP) // VP_BSWAP
5853 VP_CASE(CTLZ) // VP_CTLZ
5854 VP_CASE(CTTZ) // VP_CTTZ
5855 VP_CASE(CTPOP) // VP_CTPOP
5857 case ISD::VP_CTLZ_ZERO_UNDEF:
5858 return RISCVISD::CTLZ_VL;
5860 case ISD::VP_CTTZ_ZERO_UNDEF:
5861 return RISCVISD::CTTZ_VL;
5862 case ISD::FMA:
5863 case ISD::VP_FMA:
5864 return RISCVISD::VFMADD_VL;
5865 case ISD::STRICT_FMA:
5867 case ISD::AND:
5868 case ISD::VP_AND:
5869 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5870 return RISCVISD::VMAND_VL;
5871 return RISCVISD::AND_VL;
5872 case ISD::OR:
5873 case ISD::VP_OR:
5874 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5875 return RISCVISD::VMOR_VL;
5876 return RISCVISD::OR_VL;
5877 case ISD::XOR:
5878 case ISD::VP_XOR:
5879 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5880 return RISCVISD::VMXOR_VL;
5881 return RISCVISD::XOR_VL;
5882 case ISD::VP_SELECT:
5883 case ISD::VP_MERGE:
5884 return RISCVISD::VMERGE_VL;
5885 case ISD::VP_ASHR:
5886 return RISCVISD::SRA_VL;
5887 case ISD::VP_LSHR:
5888 return RISCVISD::SRL_VL;
5889 case ISD::VP_SQRT:
5890 return RISCVISD::FSQRT_VL;
5891 case ISD::VP_SIGN_EXTEND:
5892 return RISCVISD::VSEXT_VL;
5893 case ISD::VP_ZERO_EXTEND:
5894 return RISCVISD::VZEXT_VL;
5895 case ISD::VP_FP_TO_SINT:
5897 case ISD::VP_FP_TO_UINT:
5899 case ISD::FMINNUM:
5900 case ISD::VP_FMINNUM:
5901 return RISCVISD::VFMIN_VL;
5902 case ISD::FMAXNUM:
5903 case ISD::VP_FMAXNUM:
5904 return RISCVISD::VFMAX_VL;
5905 case ISD::LRINT:
5906 case ISD::VP_LRINT:
5907 case ISD::LLRINT:
5908 case ISD::VP_LLRINT:
5910 }
5911 // clang-format on
5912#undef OP_CASE
5913#undef VP_CASE
5914}
5915
5916/// Return true if a RISC-V target specified op has a merge operand.
5917static bool hasMergeOp(unsigned Opcode) {
5918 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5920 "not a RISC-V target specific op");
5922 126 &&
5925 21 &&
5926 "adding target specific op should update this function");
5927 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5928 return true;
5929 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5930 return true;
5931 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5932 return true;
5933 if (Opcode == RISCVISD::SETCC_VL)
5934 return true;
5935 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5936 return true;
5937 if (Opcode == RISCVISD::VMERGE_VL)
5938 return true;
5939 return false;
5940}
5941
5942/// Return true if a RISC-V target specified op has a mask operand.
5943static bool hasMaskOp(unsigned Opcode) {
5944 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5946 "not a RISC-V target specific op");
5948 126 &&
5951 21 &&
5952 "adding target specific op should update this function");
5953 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5954 return true;
5955 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5956 return true;
5957 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5959 return true;
5960 return false;
5961}
5962
5964 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5965 SDLoc DL(Op);
5966
5969
5970 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5971 if (!Op.getOperand(j).getValueType().isVector()) {
5972 LoOperands[j] = Op.getOperand(j);
5973 HiOperands[j] = Op.getOperand(j);
5974 continue;
5975 }
5976 std::tie(LoOperands[j], HiOperands[j]) =
5977 DAG.SplitVector(Op.getOperand(j), DL);
5978 }
5979
5980 SDValue LoRes =
5981 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5982 SDValue HiRes =
5983 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5984
5985 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5986}
5987
5989 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5990 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5991 SDLoc DL(Op);
5992
5995
5996 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5997 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
5998 std::tie(LoOperands[j], HiOperands[j]) =
5999 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6000 continue;
6001 }
6002 if (!Op.getOperand(j).getValueType().isVector()) {
6003 LoOperands[j] = Op.getOperand(j);
6004 HiOperands[j] = Op.getOperand(j);
6005 continue;
6006 }
6007 std::tie(LoOperands[j], HiOperands[j]) =
6008 DAG.SplitVector(Op.getOperand(j), DL);
6009 }
6010
6011 SDValue LoRes =
6012 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6013 SDValue HiRes =
6014 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6015
6016 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6017}
6018
6020 SDLoc DL(Op);
6021
6022 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6023 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6024 auto [EVLLo, EVLHi] =
6025 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6026
6027 SDValue ResLo =
6028 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6029 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6030 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6031 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6032}
6033
6035
6036 assert(Op->isStrictFPOpcode());
6037
6038 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6039
6040 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6041 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6042
6043 SDLoc DL(Op);
6044
6047
6048 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6049 if (!Op.getOperand(j).getValueType().isVector()) {
6050 LoOperands[j] = Op.getOperand(j);
6051 HiOperands[j] = Op.getOperand(j);
6052 continue;
6053 }
6054 std::tie(LoOperands[j], HiOperands[j]) =
6055 DAG.SplitVector(Op.getOperand(j), DL);
6056 }
6057
6058 SDValue LoRes =
6059 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6060 HiOperands[0] = LoRes.getValue(1);
6061 SDValue HiRes =
6062 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6063
6064 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6065 LoRes.getValue(0), HiRes.getValue(0));
6066 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6067}
6068
6070 SelectionDAG &DAG) const {
6071 switch (Op.getOpcode()) {
6072 default:
6073 report_fatal_error("unimplemented operand");
6074 case ISD::ATOMIC_FENCE:
6075 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6076 case ISD::GlobalAddress:
6077 return lowerGlobalAddress(Op, DAG);
6078 case ISD::BlockAddress:
6079 return lowerBlockAddress(Op, DAG);
6080 case ISD::ConstantPool:
6081 return lowerConstantPool(Op, DAG);
6082 case ISD::JumpTable:
6083 return lowerJumpTable(Op, DAG);
6085 return lowerGlobalTLSAddress(Op, DAG);
6086 case ISD::Constant:
6087 return lowerConstant(Op, DAG, Subtarget);
6088 case ISD::SELECT:
6089 return lowerSELECT(Op, DAG);
6090 case ISD::BRCOND:
6091 return lowerBRCOND(Op, DAG);
6092 case ISD::VASTART:
6093 return lowerVASTART(Op, DAG);
6094 case ISD::FRAMEADDR:
6095 return lowerFRAMEADDR(Op, DAG);
6096 case ISD::RETURNADDR:
6097 return lowerRETURNADDR(Op, DAG);
6098 case ISD::SADDO:
6099 case ISD::SSUBO:
6100 return lowerSADDO_SSUBO(Op, DAG);
6101 case ISD::SMULO:
6102 return lowerSMULO(Op, DAG);
6103 case ISD::SHL_PARTS:
6104 return lowerShiftLeftParts(Op, DAG);
6105 case ISD::SRA_PARTS:
6106 return lowerShiftRightParts(Op, DAG, true);
6107 case ISD::SRL_PARTS:
6108 return lowerShiftRightParts(Op, DAG, false);
6109 case ISD::ROTL:
6110 case ISD::ROTR:
6111 if (Op.getValueType().isFixedLengthVector()) {
6112 assert(Subtarget.hasStdExtZvkb());
6113 return lowerToScalableOp(Op, DAG);
6114 }
6115 assert(Subtarget.hasVendorXTHeadBb() &&
6116 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6117 "Unexpected custom legalization");
6118 // XTHeadBb only supports rotate by constant.
6119 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6120 return SDValue();
6121 return Op;
6122 case ISD::BITCAST: {
6123 SDLoc DL(Op);
6124 EVT VT = Op.getValueType();
6125 SDValue Op0 = Op.getOperand(0);
6126 EVT Op0VT = Op0.getValueType();
6127 MVT XLenVT = Subtarget.getXLenVT();
6128 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6129 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6130 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6131 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6132 return FPConv;
6133 }
6134 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6135 Subtarget.hasStdExtZfbfmin()) {
6136 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6137 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6138 return FPConv;
6139 }
6140 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6141 Subtarget.hasStdExtFOrZfinx()) {
6142 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6143 SDValue FPConv =
6144 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6145 return FPConv;
6146 }
6147 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6148 SDValue Lo, Hi;
6149 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6150 SDValue RetReg =
6151 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6152 return RetReg;
6153 }
6154
6155 // Consider other scalar<->scalar casts as legal if the types are legal.
6156 // Otherwise expand them.
6157 if (!VT.isVector() && !Op0VT.isVector()) {
6158 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6159 return Op;
6160 return SDValue();
6161 }
6162
6163 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6164 "Unexpected types");
6165
6166 if (VT.isFixedLengthVector()) {
6167 // We can handle fixed length vector bitcasts with a simple replacement
6168 // in isel.
6169 if (Op0VT.isFixedLengthVector())
6170 return Op;
6171 // When bitcasting from scalar to fixed-length vector, insert the scalar
6172 // into a one-element vector of the result type, and perform a vector
6173 // bitcast.
6174 if (!Op0VT.isVector()) {
6175 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6176 if (!isTypeLegal(BVT))
6177 return SDValue();
6178 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6179 DAG.getUNDEF(BVT), Op0,
6180 DAG.getVectorIdxConstant(0, DL)));
6181 }
6182 return SDValue();
6183 }
6184 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6185 // thus: bitcast the vector to a one-element vector type whose element type
6186 // is the same as the result type, and extract the first element.
6187 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6188 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6189 if (!isTypeLegal(BVT))
6190 return SDValue();
6191 SDValue BVec = DAG.getBitcast(BVT, Op0);
6192 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6193 DAG.getVectorIdxConstant(0, DL));
6194 }
6195 return SDValue();
6196 }
6198 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6200 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6202 return LowerINTRINSIC_VOID(Op, DAG);
6203 case ISD::IS_FPCLASS:
6204 return LowerIS_FPCLASS(Op, DAG);
6205 case ISD::BITREVERSE: {
6206 MVT VT = Op.getSimpleValueType();
6207 if (VT.isFixedLengthVector()) {
6208 assert(Subtarget.hasStdExtZvbb());
6209 return lowerToScalableOp(Op, DAG);
6210 }
6211 SDLoc DL(Op);
6212 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6213 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6214 // Expand bitreverse to a bswap(rev8) followed by brev8.
6215 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6216 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6217 }
6218 case ISD::TRUNCATE:
6219 // Only custom-lower vector truncates
6220 if (!Op.getSimpleValueType().isVector())
6221 return Op;
6222 return lowerVectorTruncLike(Op, DAG);
6223 case ISD::ANY_EXTEND:
6224 case ISD::ZERO_EXTEND:
6225 if (Op.getOperand(0).getValueType().isVector() &&
6226 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6227 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6228 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6229 case ISD::SIGN_EXTEND:
6230 if (Op.getOperand(0).getValueType().isVector() &&
6231 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6232 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6233 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6235 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6237 return lowerINSERT_VECTOR_ELT(Op, DAG);
6239 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6240 case ISD::SCALAR_TO_VECTOR: {
6241 MVT VT = Op.getSimpleValueType();
6242 SDLoc DL(Op);
6243 SDValue Scalar = Op.getOperand(0);
6244 if (VT.getVectorElementType() == MVT::i1) {
6245 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6246 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6247 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6248 }
6249 MVT ContainerVT = VT;
6250 if (VT.isFixedLengthVector())
6251 ContainerVT = getContainerForFixedLengthVector(VT);
6252 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6253 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6254 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6255 DAG.getUNDEF(ContainerVT), Scalar, VL);
6256 if (VT.isFixedLengthVector())
6257 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6258 return V;
6259 }
6260 case ISD::VSCALE: {
6261 MVT XLenVT = Subtarget.getXLenVT();
6262 MVT VT = Op.getSimpleValueType();
6263 SDLoc DL(Op);
6264 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6265 // We define our scalable vector types for lmul=1 to use a 64 bit known
6266 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6267 // vscale as VLENB / 8.
6268 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6269 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6270 report_fatal_error("Support for VLEN==32 is incomplete.");
6271 // We assume VLENB is a multiple of 8. We manually choose the best shift
6272 // here because SimplifyDemandedBits isn't always able to simplify it.
6273 uint64_t Val = Op.getConstantOperandVal(0);
6274 if (isPowerOf2_64(Val)) {
6275 uint64_t Log2 = Log2_64(Val);
6276 if (Log2 < 3)
6277 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6278 DAG.getConstant(3 - Log2, DL, VT));
6279 else if (Log2 > 3)
6280 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6281 DAG.getConstant(Log2 - 3, DL, XLenVT));
6282 } else if ((Val % 8) == 0) {
6283 // If the multiplier is a multiple of 8, scale it down to avoid needing
6284 // to shift the VLENB value.
6285 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6286 DAG.getConstant(Val / 8, DL, XLenVT));
6287 } else {
6288 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6289 DAG.getConstant(3, DL, XLenVT));
6290 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6291 DAG.getConstant(Val, DL, XLenVT));
6292 }
6293 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6294 }
6295 case ISD::FPOWI: {
6296 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6297 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6298 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6299 Op.getOperand(1).getValueType() == MVT::i32) {
6300 SDLoc DL(Op);
6301 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6302 SDValue Powi =
6303 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6304 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6305 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6306 }
6307 return SDValue();
6308 }
6309 case ISD::FMAXIMUM:
6310 case ISD::FMINIMUM:
6311 if (Op.getValueType() == MVT::nxv32f16 &&
6312 (Subtarget.hasVInstructionsF16Minimal() &&
6313 !Subtarget.hasVInstructionsF16()))
6314 return SplitVectorOp(Op, DAG);
6315 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6316 case ISD::FP_EXTEND: {
6317 SDLoc DL(Op);
6318 EVT VT = Op.getValueType();
6319 SDValue Op0 = Op.getOperand(0);
6320 EVT Op0VT = Op0.getValueType();
6321 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6322 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6323 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6324 SDValue FloatVal =
6325 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6326 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6327 }
6328
6329 if (!Op.getValueType().isVector())
6330 return Op;
6331 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6332 }
6333 case ISD::FP_ROUND: {
6334 SDLoc DL(Op);
6335 EVT VT = Op.getValueType();
6336 SDValue Op0 = Op.getOperand(0);
6337 EVT Op0VT = Op0.getValueType();
6338 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6339 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6340 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6341 Subtarget.hasStdExtDOrZdinx()) {
6342 SDValue FloatVal =
6343 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6344 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6345 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6346 }
6347
6348 if (!Op.getValueType().isVector())
6349 return Op;
6350 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6351 }
6354 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6355 case ISD::SINT_TO_FP:
6356 case ISD::UINT_TO_FP:
6357 if (Op.getValueType().isVector() &&
6358 Op.getValueType().getScalarType() == MVT::f16 &&
6359 (Subtarget.hasVInstructionsF16Minimal() &&
6360 !Subtarget.hasVInstructionsF16())) {
6361 if (Op.getValueType() == MVT::nxv32f16)
6362 return SplitVectorOp(Op, DAG);
6363 // int -> f32
6364 SDLoc DL(Op);
6365 MVT NVT =
6366 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6367 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6368 // f32 -> f16
6369 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6370 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6371 }
6372 [[fallthrough]];
6373 case ISD::FP_TO_SINT:
6374 case ISD::FP_TO_UINT:
6375 if (SDValue Op1 = Op.getOperand(0);
6376 Op1.getValueType().isVector() &&
6377 Op1.getValueType().getScalarType() == MVT::f16 &&
6378 (Subtarget.hasVInstructionsF16Minimal() &&
6379 !Subtarget.hasVInstructionsF16())) {
6380 if (Op1.getValueType() == MVT::nxv32f16)
6381 return SplitVectorOp(Op, DAG);
6382 // f16 -> f32
6383 SDLoc DL(Op);
6384 MVT NVT = MVT::getVectorVT(MVT::f32,
6385 Op1.getValueType().getVectorElementCount());
6386 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6387 // f32 -> int
6388 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6389 }
6390 [[fallthrough]];
6395 // RVV can only do fp<->int conversions to types half/double the size as
6396 // the source. We custom-lower any conversions that do two hops into
6397 // sequences.
6398 MVT VT = Op.getSimpleValueType();
6399 if (!VT.isVector())
6400 return Op;
6401 SDLoc DL(Op);
6402 bool IsStrict = Op->isStrictFPOpcode();
6403 SDValue Src = Op.getOperand(0 + IsStrict);
6404 MVT EltVT = VT.getVectorElementType();
6405 MVT SrcVT = Src.getSimpleValueType();
6406 MVT SrcEltVT = SrcVT.getVectorElementType();
6407 unsigned EltSize = EltVT.getSizeInBits();
6408 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6409 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6410 "Unexpected vector element types");
6411
6412 bool IsInt2FP = SrcEltVT.isInteger();
6413 // Widening conversions
6414 if (EltSize > (2 * SrcEltSize)) {
6415 if (IsInt2FP) {
6416 // Do a regular integer sign/zero extension then convert to float.
6417 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6419 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6420 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6423 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6424 if (IsStrict)
6425 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6426 Op.getOperand(0), Ext);
6427 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6428 }
6429 // FP2Int
6430 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6431 // Do one doubling fp_extend then complete the operation by converting
6432 // to int.
6433 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6434 if (IsStrict) {
6435 auto [FExt, Chain] =
6436 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6437 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6438 }
6439 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6440 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6441 }
6442
6443 // Narrowing conversions
6444 if (SrcEltSize > (2 * EltSize)) {
6445 if (IsInt2FP) {
6446 // One narrowing int_to_fp, then an fp_round.
6447 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6448 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6449 if (IsStrict) {
6450 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6451 DAG.getVTList(InterimFVT, MVT::Other),
6452 Op.getOperand(0), Src);
6453 SDValue Chain = Int2FP.getValue(1);
6454 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6455 }
6456 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6457 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6458 }
6459 // FP2Int
6460 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6461 // representable by the integer, the result is poison.
6462 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6464 if (IsStrict) {
6465 SDValue FP2Int =
6466 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6467 Op.getOperand(0), Src);
6468 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6469 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6470 }
6471 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6472 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6473 }
6474
6475 // Scalable vectors can exit here. Patterns will handle equally-sized
6476 // conversions halving/doubling ones.
6477 if (!VT.isFixedLengthVector())
6478 return Op;
6479
6480 // For fixed-length vectors we lower to a custom "VL" node.
6481 unsigned RVVOpc = 0;
6482 switch (Op.getOpcode()) {
6483 default:
6484 llvm_unreachable("Impossible opcode");
6485 case ISD::FP_TO_SINT:
6487 break;
6488 case ISD::FP_TO_UINT:
6490 break;
6491 case ISD::SINT_TO_FP:
6492 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6493 break;
6494 case ISD::UINT_TO_FP:
6495 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6496 break;
6499 break;
6502 break;
6505 break;
6508 break;
6509 }
6510
6511 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6512 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6513 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6514 "Expected same element count");
6515
6516 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6517
6518 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6519 if (IsStrict) {
6520 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6521 Op.getOperand(0), Src, Mask, VL);
6522 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6523 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6524 }
6525 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6526 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6527 }
6530 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6531 case ISD::FP_TO_BF16: {
6532 // Custom lower to ensure the libcall return is passed in an FPR on hard
6533 // float ABIs.
6534 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6535 SDLoc DL(Op);
6536 MakeLibCallOptions CallOptions;
6537 RTLIB::Libcall LC =
6538 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6539 SDValue Res =
6540 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6541 if (Subtarget.is64Bit() && !RV64LegalI32)
6542 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6543 return DAG.getBitcast(MVT::i32, Res);
6544 }
6545 case ISD::BF16_TO_FP: {
6546 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6547 MVT VT = Op.getSimpleValueType();
6548 SDLoc DL(Op);
6549 Op = DAG.getNode(
6550 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6551 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6552 SDValue Res = Subtarget.is64Bit()
6553 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6554 : DAG.getBitcast(MVT::f32, Op);
6555 // fp_extend if the target VT is bigger than f32.
6556 if (VT != MVT::f32)
6557 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6558 return Res;
6559 }
6560 case ISD::FP_TO_FP16: {
6561 // Custom lower to ensure the libcall return is passed in an FPR on hard
6562 // float ABIs.
6563 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6564 SDLoc DL(Op);
6565 MakeLibCallOptions CallOptions;
6566 RTLIB::Libcall LC =
6567 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6568 SDValue Res =
6569 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6570 if (Subtarget.is64Bit() && !RV64LegalI32)
6571 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6572 return DAG.getBitcast(MVT::i32, Res);
6573 }
6574 case ISD::FP16_TO_FP: {
6575 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6576 // float ABIs.
6577 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6578 SDLoc DL(Op);
6579 MakeLibCallOptions CallOptions;
6580 SDValue Arg = Subtarget.is64Bit()
6581 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6582 Op.getOperand(0))
6583 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6584 SDValue Res =
6585 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6586 .first;
6587 return Res;
6588 }
6589 case ISD::FTRUNC:
6590 case ISD::FCEIL:
6591 case ISD::FFLOOR:
6592 case ISD::FNEARBYINT:
6593 case ISD::FRINT:
6594 case ISD::FROUND:
6595 case ISD::FROUNDEVEN:
6596 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6597 case ISD::LRINT:
6598 case ISD::LLRINT:
6599 return lowerVectorXRINT(Op, DAG, Subtarget);
6600 case ISD::VECREDUCE_ADD:
6605 return lowerVECREDUCE(Op, DAG);
6606 case ISD::VECREDUCE_AND:
6607 case ISD::VECREDUCE_OR:
6608 case ISD::VECREDUCE_XOR:
6609 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6610 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6611 return lowerVECREDUCE(Op, DAG);
6618 return lowerFPVECREDUCE(Op, DAG);
6619 case ISD::VP_REDUCE_ADD:
6620 case ISD::VP_REDUCE_UMAX:
6621 case ISD::VP_REDUCE_SMAX:
6622 case ISD::VP_REDUCE_UMIN:
6623 case ISD::VP_REDUCE_SMIN:
6624 case ISD::VP_REDUCE_FADD:
6625 case ISD::VP_REDUCE_SEQ_FADD:
6626 case ISD::VP_REDUCE_FMIN:
6627 case ISD::VP_REDUCE_FMAX:
6628 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6629 (Subtarget.hasVInstructionsF16Minimal() &&
6630 !Subtarget.hasVInstructionsF16()))
6631 return SplitVectorReductionOp(Op, DAG);
6632 return lowerVPREDUCE(Op, DAG);
6633 case ISD::VP_REDUCE_AND:
6634 case ISD::VP_REDUCE_OR:
6635 case ISD::VP_REDUCE_XOR:
6636 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6637 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6638 return lowerVPREDUCE(Op, DAG);
6639 case ISD::VP_CTTZ_ELTS:
6640 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
6641 return lowerVPCttzElements(Op, DAG);
6642 case ISD::UNDEF: {
6643 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6644 return convertFromScalableVector(Op.getSimpleValueType(),
6645 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6646 }
6648 return lowerINSERT_SUBVECTOR(Op, DAG);
6650 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6652 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6654 return lowerVECTOR_INTERLEAVE(Op, DAG);
6655 case ISD::STEP_VECTOR:
6656 return lowerSTEP_VECTOR(Op, DAG);
6658 return lowerVECTOR_REVERSE(Op, DAG);
6659 case ISD::VECTOR_SPLICE:
6660 return lowerVECTOR_SPLICE(Op, DAG);
6661 case ISD::BUILD_VECTOR:
6662 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6663 case ISD::SPLAT_VECTOR:
6664 if (Op.getValueType().getScalarType() == MVT::f16 &&
6665 (Subtarget.hasVInstructionsF16Minimal() &&
6666 !Subtarget.hasVInstructionsF16())) {
6667 if (Op.getValueType() == MVT::nxv32f16)
6668 return SplitVectorOp(Op, DAG);
6669 SDLoc DL(Op);
6670 SDValue NewScalar =
6671 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6672 SDValue NewSplat = DAG.getNode(
6674 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6675 NewScalar);
6676 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6677 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6678 }
6679 if (Op.getValueType().getVectorElementType() == MVT::i1)
6680 return lowerVectorMaskSplat(Op, DAG);
6681 return SDValue();
6683 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6684 case ISD::CONCAT_VECTORS: {
6685 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6686 // better than going through the stack, as the default expansion does.
6687 SDLoc DL(Op);
6688 MVT VT = Op.getSimpleValueType();
6689 MVT ContainerVT = VT;
6690 if (VT.isFixedLengthVector())
6691 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6692
6693 // Recursively split concat_vectors with more than 2 operands:
6694 //
6695 // concat_vector op1, op2, op3, op4
6696 // ->
6697 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6698 //
6699 // This reduces the length of the chain of vslideups and allows us to
6700 // perform the vslideups at a smaller LMUL, limited to MF2.
6701 if (Op.getNumOperands() > 2 &&
6702 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6703 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6705 size_t HalfNumOps = Op.getNumOperands() / 2;
6706 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6707 Op->ops().take_front(HalfNumOps));
6708 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6709 Op->ops().drop_front(HalfNumOps));
6710 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6711 }
6712
6713 unsigned NumOpElts =
6714 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6715 SDValue Vec = DAG.getUNDEF(VT);
6716 for (const auto &OpIdx : enumerate(Op->ops())) {
6717 SDValue SubVec = OpIdx.value();
6718 // Don't insert undef subvectors.
6719 if (SubVec.isUndef())
6720 continue;
6721 Vec =
6722 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6723 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6724 }
6725 return Vec;
6726 }
6727 case ISD::LOAD:
6728 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6729 return V;
6730 if (Op.getValueType().isFixedLengthVector())
6731 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6732 return Op;
6733 case ISD::STORE:
6734 if (auto V = expandUnalignedRVVStore(Op, DAG))
6735 return V;
6736 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6737 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6738 return Op;
6739 case ISD::MLOAD:
6740 case ISD::VP_LOAD:
6741 return lowerMaskedLoad(Op, DAG);
6742 case ISD::MSTORE:
6743 case ISD::VP_STORE:
6744 return lowerMaskedStore(Op, DAG);
6745 case ISD::SELECT_CC: {
6746 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6747 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6748 // into separate SETCC+SELECT just like LegalizeDAG.
6749 SDValue Tmp1 = Op.getOperand(0);
6750 SDValue Tmp2 = Op.getOperand(1);
6751 SDValue True = Op.getOperand(2);
6752 SDValue False = Op.getOperand(3);
6753 EVT VT = Op.getValueType();
6754 SDValue CC = Op.getOperand(4);
6755 EVT CmpVT = Tmp1.getValueType();
6756 EVT CCVT =
6757 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6758 SDLoc DL(Op);
6759 SDValue Cond =
6760 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6761 return DAG.getSelect(DL, VT, Cond, True, False);
6762 }
6763 case ISD::SETCC: {
6764 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6765 if (OpVT.isScalarInteger()) {
6766 MVT VT = Op.getSimpleValueType();
6767 SDValue LHS = Op.getOperand(0);
6768 SDValue RHS = Op.getOperand(1);
6769 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6770 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6771 "Unexpected CondCode");
6772
6773 SDLoc DL(Op);
6774
6775 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6776 // convert this to the equivalent of (set(u)ge X, C+1) by using
6777 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6778 // in a register.
6779 if (isa<ConstantSDNode>(RHS)) {
6780 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6781 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6782 // If this is an unsigned compare and the constant is -1, incrementing
6783 // the constant would change behavior. The result should be false.
6784 if (CCVal == ISD::SETUGT && Imm == -1)
6785 return DAG.getConstant(0, DL, VT);
6786 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6787 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6788 SDValue SetCC = DAG.getSetCC(
6789 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6790 return DAG.getLogicalNOT(DL, SetCC, VT);
6791 }
6792 }
6793
6794 // Not a constant we could handle, swap the operands and condition code to
6795 // SETLT/SETULT.
6796 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6797 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6798 }
6799
6800 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6801 (Subtarget.hasVInstructionsF16Minimal() &&
6802 !Subtarget.hasVInstructionsF16()))
6803 return SplitVectorOp(Op, DAG);
6804
6805 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6806 }
6807 case ISD::ADD:
6808 case ISD::SUB:
6809 case ISD::MUL:
6810 case ISD::MULHS:
6811 case ISD::MULHU:
6812 case ISD::AND:
6813 case ISD::OR:
6814 case ISD::XOR:
6815 case ISD::SDIV:
6816 case ISD::SREM:
6817 case ISD::UDIV:
6818 case ISD::UREM:
6819 case ISD::BSWAP:
6820 case ISD::CTPOP:
6821 return lowerToScalableOp(Op, DAG);
6822 case ISD::SHL:
6823 case ISD::SRA:
6824 case ISD::SRL:
6825 if (Op.getSimpleValueType().isFixedLengthVector())
6826 return lowerToScalableOp(Op, DAG);
6827 // This can be called for an i32 shift amount that needs to be promoted.
6828 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6829 "Unexpected custom legalisation");
6830 return SDValue();
6831 case ISD::FADD:
6832 case ISD::FSUB:
6833 case ISD::FMUL:
6834 case ISD::FDIV:
6835 case ISD::FNEG:
6836 case ISD::FABS:
6837 case ISD::FSQRT:
6838 case ISD::FMA:
6839 case ISD::FMINNUM:
6840 case ISD::FMAXNUM:
6841 if (Op.getValueType() == MVT::nxv32f16 &&
6842 (Subtarget.hasVInstructionsF16Minimal() &&
6843 !Subtarget.hasVInstructionsF16()))
6844 return SplitVectorOp(Op, DAG);
6845 [[fallthrough]];
6846 case ISD::AVGFLOORU:
6847 case ISD::AVGCEILU:
6848 case ISD::SMIN:
6849 case ISD::SMAX:
6850 case ISD::UMIN:
6851 case ISD::UMAX:
6852 return lowerToScalableOp(Op, DAG);
6853 case ISD::UADDSAT:
6854 case ISD::USUBSAT:
6855 if (!Op.getValueType().isVector())
6856 return lowerUADDSAT_USUBSAT(Op, DAG);
6857 return lowerToScalableOp(Op, DAG);
6858 case ISD::SADDSAT:
6859 case ISD::SSUBSAT:
6860 if (!Op.getValueType().isVector())
6861 return lowerSADDSAT_SSUBSAT(Op, DAG);
6862 return lowerToScalableOp(Op, DAG);
6863 case ISD::ABDS:
6864 case ISD::ABDU: {
6865 SDLoc dl(Op);
6866 EVT VT = Op->getValueType(0);
6867 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6868 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6869 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6870
6871 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6872 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6873 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6874 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6875 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6876 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6877 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6878 }
6879 case ISD::ABS:
6880 case ISD::VP_ABS:
6881 return lowerABS(Op, DAG);
6882 case ISD::CTLZ:
6884 case ISD::CTTZ:
6886 if (Subtarget.hasStdExtZvbb())
6887 return lowerToScalableOp(Op, DAG);
6888 assert(Op.getOpcode() != ISD::CTTZ);
6889 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6890 case ISD::VSELECT:
6891 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6892 case ISD::FCOPYSIGN:
6893 if (Op.getValueType() == MVT::nxv32f16 &&
6894 (Subtarget.hasVInstructionsF16Minimal() &&
6895 !Subtarget.hasVInstructionsF16()))
6896 return SplitVectorOp(Op, DAG);
6897 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6898 case ISD::STRICT_FADD:
6899 case ISD::STRICT_FSUB:
6900 case ISD::STRICT_FMUL:
6901 case ISD::STRICT_FDIV:
6902 case ISD::STRICT_FSQRT:
6903 case ISD::STRICT_FMA:
6904 if (Op.getValueType() == MVT::nxv32f16 &&
6905 (Subtarget.hasVInstructionsF16Minimal() &&
6906 !Subtarget.hasVInstructionsF16()))
6907 return SplitStrictFPVectorOp(Op, DAG);
6908 return lowerToScalableOp(Op, DAG);
6909 case ISD::STRICT_FSETCC:
6911 return lowerVectorStrictFSetcc(Op, DAG);
6912 case ISD::STRICT_FCEIL:
6913 case ISD::STRICT_FRINT:
6914 case ISD::STRICT_FFLOOR:
6915 case ISD::STRICT_FTRUNC:
6917 case ISD::STRICT_FROUND:
6919 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6920 case ISD::MGATHER:
6921 case ISD::VP_GATHER:
6922 return lowerMaskedGather(Op, DAG);
6923 case ISD::MSCATTER:
6924 case ISD::VP_SCATTER:
6925 return lowerMaskedScatter(Op, DAG);
6926 case ISD::GET_ROUNDING:
6927 return lowerGET_ROUNDING(Op, DAG);
6928 case ISD::SET_ROUNDING:
6929 return lowerSET_ROUNDING(Op, DAG);
6930 case ISD::EH_DWARF_CFA:
6931 return lowerEH_DWARF_CFA(Op, DAG);
6932 case ISD::VP_SELECT:
6933 case ISD::VP_MERGE:
6934 case ISD::VP_ADD:
6935 case ISD::VP_SUB:
6936 case ISD::VP_MUL:
6937 case ISD::VP_SDIV:
6938 case ISD::VP_UDIV:
6939 case ISD::VP_SREM:
6940 case ISD::VP_UREM:
6941 case ISD::VP_UADDSAT:
6942 case ISD::VP_USUBSAT:
6943 case ISD::VP_SADDSAT:
6944 case ISD::VP_SSUBSAT:
6945 case ISD::VP_LRINT:
6946 case ISD::VP_LLRINT:
6947 return lowerVPOp(Op, DAG);
6948 case ISD::VP_AND:
6949 case ISD::VP_OR:
6950 case ISD::VP_XOR:
6951 return lowerLogicVPOp(Op, DAG);
6952 case ISD::VP_FADD:
6953 case ISD::VP_FSUB:
6954 case ISD::VP_FMUL:
6955 case ISD::VP_FDIV:
6956 case ISD::VP_FNEG:
6957 case ISD::VP_FABS:
6958 case ISD::VP_SQRT:
6959 case ISD::VP_FMA:
6960 case ISD::VP_FMINNUM:
6961 case ISD::VP_FMAXNUM:
6962 case ISD::VP_FCOPYSIGN:
6963 if (Op.getValueType() == MVT::nxv32f16 &&
6964 (Subtarget.hasVInstructionsF16Minimal() &&
6965 !Subtarget.hasVInstructionsF16()))
6966 return SplitVPOp(Op, DAG);
6967 [[fallthrough]];
6968 case ISD::VP_ASHR:
6969 case ISD::VP_LSHR:
6970 case ISD::VP_SHL:
6971 return lowerVPOp(Op, DAG);
6972 case ISD::VP_IS_FPCLASS:
6973 return LowerIS_FPCLASS(Op, DAG);
6974 case ISD::VP_SIGN_EXTEND:
6975 case ISD::VP_ZERO_EXTEND:
6976 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6977 return lowerVPExtMaskOp(Op, DAG);
6978 return lowerVPOp(Op, DAG);
6979 case ISD::VP_TRUNCATE:
6980 return lowerVectorTruncLike(Op, DAG);
6981 case ISD::VP_FP_EXTEND:
6982 case ISD::VP_FP_ROUND:
6983 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6984 case ISD::VP_SINT_TO_FP:
6985 case ISD::VP_UINT_TO_FP:
6986 if (Op.getValueType().isVector() &&
6987 Op.getValueType().getScalarType() == MVT::f16 &&
6988 (Subtarget.hasVInstructionsF16Minimal() &&
6989 !Subtarget.hasVInstructionsF16())) {
6990 if (Op.getValueType() == MVT::nxv32f16)
6991 return SplitVPOp(Op, DAG);
6992 // int -> f32
6993 SDLoc DL(Op);
6994 MVT NVT =
6995 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6996 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6997 // f32 -> f16
6998 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6999 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7000 }
7001 [[fallthrough]];
7002 case ISD::VP_FP_TO_SINT:
7003 case ISD::VP_FP_TO_UINT:
7004 if (SDValue Op1 = Op.getOperand(0);
7005 Op1.getValueType().isVector() &&
7006 Op1.getValueType().getScalarType() == MVT::f16 &&
7007 (Subtarget.hasVInstructionsF16Minimal() &&
7008 !Subtarget.hasVInstructionsF16())) {
7009 if (Op1.getValueType() == MVT::nxv32f16)
7010 return SplitVPOp(Op, DAG);
7011 // f16 -> f32
7012 SDLoc DL(Op);
7013 MVT NVT = MVT::getVectorVT(MVT::f32,
7014 Op1.getValueType().getVectorElementCount());
7015 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7016 // f32 -> int
7017 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7018 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7019 }
7020 return lowerVPFPIntConvOp(Op, DAG);
7021 case ISD::VP_SETCC:
7022 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
7023 (Subtarget.hasVInstructionsF16Minimal() &&
7024 !Subtarget.hasVInstructionsF16()))
7025 return SplitVPOp(Op, DAG);
7026 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7027 return lowerVPSetCCMaskOp(Op, DAG);
7028 [[fallthrough]];
7029 case ISD::VP_SMIN:
7030 case ISD::VP_SMAX:
7031 case ISD::VP_UMIN:
7032 case ISD::VP_UMAX:
7033 case ISD::VP_BITREVERSE:
7034 case ISD::VP_BSWAP:
7035 return lowerVPOp(Op, DAG);
7036 case ISD::VP_CTLZ:
7037 case ISD::VP_CTLZ_ZERO_UNDEF:
7038 if (Subtarget.hasStdExtZvbb())
7039 return lowerVPOp(Op, DAG);
7040 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7041 case ISD::VP_CTTZ:
7042 case ISD::VP_CTTZ_ZERO_UNDEF:
7043 if (Subtarget.hasStdExtZvbb())
7044 return lowerVPOp(Op, DAG);
7045 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7046 case ISD::VP_CTPOP:
7047 return lowerVPOp(Op, DAG);
7048 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7049 return lowerVPStridedLoad(Op, DAG);
7050 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7051 return lowerVPStridedStore(Op, DAG);
7052 case ISD::VP_FCEIL:
7053 case ISD::VP_FFLOOR:
7054 case ISD::VP_FRINT:
7055 case ISD::VP_FNEARBYINT:
7056 case ISD::VP_FROUND:
7057 case ISD::VP_FROUNDEVEN:
7058 case ISD::VP_FROUNDTOZERO:
7059 if (Op.getValueType() == MVT::nxv32f16 &&
7060 (Subtarget.hasVInstructionsF16Minimal() &&
7061 !Subtarget.hasVInstructionsF16()))
7062 return SplitVPOp(Op, DAG);
7063 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7064 case ISD::VP_FMAXIMUM:
7065 case ISD::VP_FMINIMUM:
7066 if (Op.getValueType() == MVT::nxv32f16 &&
7067 (Subtarget.hasVInstructionsF16Minimal() &&
7068 !Subtarget.hasVInstructionsF16()))
7069 return SplitVPOp(Op, DAG);
7070 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7071 case ISD::EXPERIMENTAL_VP_SPLICE:
7072 return lowerVPSpliceExperimental(Op, DAG);
7073 case ISD::EXPERIMENTAL_VP_REVERSE:
7074 return lowerVPReverseExperimental(Op, DAG);
7075 }
7076}
7077
7079 SelectionDAG &DAG, unsigned Flags) {
7080 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7081}
7082
7084 SelectionDAG &DAG, unsigned Flags) {
7085 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7086 Flags);
7087}
7088
7090 SelectionDAG &DAG, unsigned Flags) {
7091 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7092 N->getOffset(), Flags);
7093}
7094
7096 SelectionDAG &DAG, unsigned Flags) {
7097 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7098}
7099
7100template <class NodeTy>
7101SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7102 bool IsLocal, bool IsExternWeak) const {
7103 SDLoc DL(N);
7104 EVT Ty = getPointerTy(DAG.getDataLayout());
7105
7106 // When HWASAN is used and tagging of global variables is enabled
7107 // they should be accessed via the GOT, since the tagged address of a global
7108 // is incompatible with existing code models. This also applies to non-pic
7109 // mode.
7110 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7111 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7112 if (IsLocal && !Subtarget.allowTaggedGlobals())
7113 // Use PC-relative addressing to access the symbol. This generates the
7114 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7115 // %pcrel_lo(auipc)).
7116 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7117
7118 // Use PC-relative addressing to access the GOT for this symbol, then load
7119 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7120 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7121 SDValue Load =
7122 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7128 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7129 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7130 return Load;
7131 }
7132
7133 switch (getTargetMachine().getCodeModel()) {
7134 default:
7135 report_fatal_error("Unsupported code model for lowering");
7136 case CodeModel::Small: {
7137 // Generate a sequence for accessing addresses within the first 2 GiB of
7138 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7139 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7140 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7141 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7142 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7143 }
7144 case CodeModel::Medium: {
7145 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7146 if (IsExternWeak) {
7147 // An extern weak symbol may be undefined, i.e. have value 0, which may
7148 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7149 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7150 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7151 SDValue Load =
7152 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7158 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7159 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7160 return Load;
7161 }
7162
7163 // Generate a sequence for accessing addresses within any 2GiB range within
7164 // the address space. This generates the pattern (PseudoLLA sym), which
7165 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7166 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7167 }
7168 }
7169}
7170
7171SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7172 SelectionDAG &DAG) const {
7173 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7174 assert(N->getOffset() == 0 && "unexpected offset in global node");
7175 const GlobalValue *GV = N->getGlobal();
7176 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7177}
7178
7179SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7180 SelectionDAG &DAG) const {
7181 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7182
7183 return getAddr(N, DAG);
7184}
7185
7186SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7187 SelectionDAG &DAG) const {
7188 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7189
7190 return getAddr(N, DAG);
7191}
7192
7193SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7194 SelectionDAG &DAG) const {
7195 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7196
7197 return getAddr(N, DAG);
7198}
7199
7200SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7201 SelectionDAG &DAG,
7202 bool UseGOT) const {
7203 SDLoc DL(N);
7204 EVT Ty = getPointerTy(DAG.getDataLayout());
7205 const GlobalValue *GV = N->getGlobal();
7206 MVT XLenVT = Subtarget.getXLenVT();
7207
7208 if (UseGOT) {
7209 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7210 // load the address from the GOT and add the thread pointer. This generates
7211 // the pattern (PseudoLA_TLS_IE sym), which expands to
7212 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7213 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7214 SDValue Load =
7215 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7221 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7222 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7223
7224 // Add the thread pointer.
7225 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7226 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7227 }
7228
7229 // Generate a sequence for accessing the address relative to the thread
7230 // pointer, with the appropriate adjustment for the thread pointer offset.
7231 // This generates the pattern
7232 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7233 SDValue AddrHi =
7235 SDValue AddrAdd =
7237 SDValue AddrLo =
7239
7240 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7241 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7242 SDValue MNAdd =
7243 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7244 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7245}
7246
7247SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7248 SelectionDAG &DAG) const {
7249 SDLoc DL(N);
7250 EVT Ty = getPointerTy(DAG.getDataLayout());
7251 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7252 const GlobalValue *GV = N->getGlobal();
7253
7254 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7255 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7256 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7257 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7258 SDValue Load =
7259 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7260
7261 // Prepare argument list to generate call.
7263 ArgListEntry Entry;
7264 Entry.Node = Load;
7265 Entry.Ty = CallTy;
7266 Args.push_back(Entry);
7267
7268 // Setup call to __tls_get_addr.
7270 CLI.setDebugLoc(DL)
7271 .setChain(DAG.getEntryNode())
7272 .setLibCallee(CallingConv::C, CallTy,
7273 DAG.getExternalSymbol("__tls_get_addr", Ty),
7274 std::move(Args));
7275
7276 return LowerCallTo(CLI).first;
7277}
7278
7279SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7280 SelectionDAG &DAG) const {
7281 SDLoc DL(N);
7282 EVT Ty = getPointerTy(DAG.getDataLayout());
7283 const GlobalValue *GV = N->getGlobal();
7284
7285 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7286 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7287 //
7288 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7289 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7290 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7291 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7292 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7293 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7294}
7295
7296SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7297 SelectionDAG &DAG) const {
7298 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7299 assert(N->getOffset() == 0 && "unexpected offset in global node");
7300
7301 if (DAG.getTarget().useEmulatedTLS())
7302 return LowerToTLSEmulatedModel(N, DAG);
7303
7305
7308 report_fatal_error("In GHC calling convention TLS is not supported");
7309
7310 SDValue Addr;
7311 switch (Model) {
7313 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7314 break;
7316 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7317 break;
7320 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7321 : getDynamicTLSAddr(N, DAG);
7322 break;
7323 }
7324
7325 return Addr;
7326}
7327
7328// Return true if Val is equal to (setcc LHS, RHS, CC).
7329// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7330// Otherwise, return std::nullopt.
7331static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7332 ISD::CondCode CC, SDValue Val) {
7333 assert(Val->getOpcode() == ISD::SETCC);
7334 SDValue LHS2 = Val.getOperand(0);
7335 SDValue RHS2 = Val.getOperand(1);
7336 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7337
7338 if (LHS == LHS2 && RHS == RHS2) {
7339 if (CC == CC2)
7340 return true;
7341 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7342 return false;
7343 } else if (LHS == RHS2 && RHS == LHS2) {
7345 if (CC == CC2)
7346 return true;
7347 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7348 return false;
7349 }
7350
7351 return std::nullopt;
7352}
7353
7355 const RISCVSubtarget &Subtarget) {
7356 SDValue CondV = N->getOperand(0);
7357 SDValue TrueV = N->getOperand(1);
7358 SDValue FalseV = N->getOperand(2);
7359 MVT VT = N->getSimpleValueType(0);
7360 SDLoc DL(N);
7361
7362 if (!Subtarget.hasConditionalMoveFusion()) {
7363 // (select c, -1, y) -> -c | y
7364 if (isAllOnesConstant(TrueV)) {
7365 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7366 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7367 }
7368 // (select c, y, -1) -> (c-1) | y
7369 if (isAllOnesConstant(FalseV)) {
7370 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7371 DAG.getAllOnesConstant(DL, VT));
7372 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7373 }
7374
7375 // (select c, 0, y) -> (c-1) & y
7376 if (isNullConstant(TrueV)) {
7377 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7378 DAG.getAllOnesConstant(DL, VT));
7379 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7380 }
7381 // (select c, y, 0) -> -c & y
7382 if (isNullConstant(FalseV)) {
7383 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7384 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7385 }
7386 }
7387
7388 // select c, ~x, x --> xor -c, x
7389 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7390 const APInt &TrueVal = TrueV->getAsAPIntVal();
7391 const APInt &FalseVal = FalseV->getAsAPIntVal();
7392 if (~TrueVal == FalseVal) {
7393 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7394 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7395 }
7396 }
7397
7398 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7399 // when both truev and falsev are also setcc.
7400 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7401 FalseV.getOpcode() == ISD::SETCC) {
7402 SDValue LHS = CondV.getOperand(0);
7403 SDValue RHS = CondV.getOperand(1);
7404 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7405
7406 // (select x, x, y) -> x | y
7407 // (select !x, x, y) -> x & y
7408 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7409 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7410 DAG.getFreeze(FalseV));
7411 }
7412 // (select x, y, x) -> x & y
7413 // (select !x, y, x) -> x | y
7414 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7415 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7416 DAG.getFreeze(TrueV), FalseV);
7417 }
7418 }
7419
7420 return SDValue();
7421}
7422
7423// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7424// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7425// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7426// being `0` or `-1`. In such cases we can replace `select` with `and`.
7427// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7428// than `c0`?
7429static SDValue
7431 const RISCVSubtarget &Subtarget) {
7432 if (Subtarget.hasShortForwardBranchOpt())
7433 return SDValue();
7434
7435 unsigned SelOpNo = 0;
7436 SDValue Sel = BO->getOperand(0);
7437 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7438 SelOpNo = 1;
7439 Sel = BO->getOperand(1);
7440 }
7441
7442 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7443 return SDValue();
7444
7445 unsigned ConstSelOpNo = 1;
7446 unsigned OtherSelOpNo = 2;
7447 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7448 ConstSelOpNo = 2;
7449 OtherSelOpNo = 1;
7450 }
7451 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7452 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7453 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7454 return SDValue();
7455
7456 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7457 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7458 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7459 return SDValue();
7460
7461 SDLoc DL(Sel);
7462 EVT VT = BO->getValueType(0);
7463
7464 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7465 if (SelOpNo == 1)
7466 std::swap(NewConstOps[0], NewConstOps[1]);
7467
7468 SDValue NewConstOp =
7469 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7470 if (!NewConstOp)
7471 return SDValue();
7472
7473 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7474 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7475 return SDValue();
7476
7477 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7478 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7479 if (SelOpNo == 1)
7480 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7481 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7482
7483 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7484 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7485 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7486}
7487
7488SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7489 SDValue CondV = Op.getOperand(0);
7490 SDValue TrueV = Op.getOperand(1);
7491 SDValue FalseV = Op.getOperand(2);
7492 SDLoc DL(Op);
7493 MVT VT = Op.getSimpleValueType();
7494 MVT XLenVT = Subtarget.getXLenVT();
7495
7496 // Lower vector SELECTs to VSELECTs by splatting the condition.
7497 if (VT.isVector()) {
7498 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7499 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7500 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7501 }
7502
7503 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7504 // nodes to implement the SELECT. Performing the lowering here allows for
7505 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7506 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7507 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7508 VT.isScalarInteger()) {
7509 // (select c, t, 0) -> (czero_eqz t, c)
7510 if (isNullConstant(FalseV))
7511 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7512 // (select c, 0, f) -> (czero_nez f, c)
7513 if (isNullConstant(TrueV))
7514 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7515
7516 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7517 if (TrueV.getOpcode() == ISD::AND &&
7518 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7519 return DAG.getNode(
7520 ISD::OR, DL, VT, TrueV,
7521 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7522 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7523 if (FalseV.getOpcode() == ISD::AND &&
7524 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7525 return DAG.getNode(
7526 ISD::OR, DL, VT, FalseV,
7527 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7528
7529 // Try some other optimizations before falling back to generic lowering.
7530 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7531 return V;
7532
7533 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7534 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7535 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7536 const APInt &TrueVal = TrueV->getAsAPIntVal();
7537 const APInt &FalseVal = FalseV->getAsAPIntVal();
7538 const int TrueValCost = RISCVMatInt::getIntMatCost(
7539 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7540 const int FalseValCost = RISCVMatInt::getIntMatCost(
7541 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7542 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7543 SDValue LHSVal = DAG.getConstant(
7544 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7545 SDValue RHSVal =
7546 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7547 SDValue CMOV =
7549 DL, VT, LHSVal, CondV);
7550 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7551 }
7552
7553 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7554 // Unless we have the short forward branch optimization.
7555 if (!Subtarget.hasConditionalMoveFusion())
7556 return DAG.getNode(
7557 ISD::OR, DL, VT,
7558 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7559 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7560 }
7561
7562 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7563 return V;
7564
7565 if (Op.hasOneUse()) {
7566 unsigned UseOpc = Op->use_begin()->getOpcode();
7567 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7568 SDNode *BinOp = *Op->use_begin();
7569 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7570 DAG, Subtarget)) {
7571 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7572 return lowerSELECT(NewSel, DAG);
7573 }
7574 }
7575 }
7576
7577 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7578 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7579 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7580 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7581 if (FPTV && FPFV) {
7582 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7583 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7584 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7585 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7586 DAG.getConstant(1, DL, XLenVT));
7587 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7588 }
7589 }
7590
7591 // If the condition is not an integer SETCC which operates on XLenVT, we need
7592 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7593 // (select condv, truev, falsev)
7594 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7595 if (CondV.getOpcode() != ISD::SETCC ||
7596 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7597 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7598 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7599
7600 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7601
7602 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7603 }
7604
7605 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7606 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7607 // advantage of the integer compare+branch instructions. i.e.:
7608 // (select (setcc lhs, rhs, cc), truev, falsev)
7609 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7610 SDValue LHS = CondV.getOperand(0);
7611 SDValue RHS = CondV.getOperand(1);
7612 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7613
7614 // Special case for a select of 2 constants that have a diffence of 1.
7615 // Normally this is done by DAGCombine, but if the select is introduced by
7616 // type legalization or op legalization, we miss it. Restricting to SETLT
7617 // case for now because that is what signed saturating add/sub need.
7618 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7619 // but we would probably want to swap the true/false values if the condition
7620 // is SETGE/SETLE to avoid an XORI.
7621 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7622 CCVal == ISD::SETLT) {
7623 const APInt &TrueVal = TrueV->getAsAPIntVal();
7624 const APInt &FalseVal = FalseV->getAsAPIntVal();
7625 if (TrueVal - 1 == FalseVal)
7626 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7627 if (TrueVal + 1 == FalseVal)
7628 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7629 }
7630
7631 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7632 // 1 < x ? x : 1 -> 0 < x ? x : 1
7633 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7634 RHS == TrueV && LHS == FalseV) {
7635 LHS = DAG.getConstant(0, DL, VT);
7636 // 0 <u x is the same as x != 0.
7637 if (CCVal == ISD::SETULT) {
7638 std::swap(LHS, RHS);
7639 CCVal = ISD::SETNE;
7640 }
7641 }
7642
7643 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7644 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7645 RHS == FalseV) {
7646 RHS = DAG.getConstant(0, DL, VT);
7647 }
7648
7649 SDValue TargetCC = DAG.getCondCode(CCVal);
7650
7651 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7652 // (select (setcc lhs, rhs, CC), constant, falsev)
7653 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7654 std::swap(TrueV, FalseV);
7655 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7656 }
7657
7658 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7659 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7660}
7661
7662SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7663 SDValue CondV = Op.getOperand(1);
7664 SDLoc DL(Op);
7665 MVT XLenVT = Subtarget.getXLenVT();
7666
7667 if (CondV.getOpcode() == ISD::SETCC &&
7668 CondV.getOperand(0).getValueType() == XLenVT) {
7669 SDValue LHS = CondV.getOperand(0);
7670 SDValue RHS = CondV.getOperand(1);
7671 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7672
7673 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7674
7675 SDValue TargetCC = DAG.getCondCode(CCVal);
7676 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7677 LHS, RHS, TargetCC, Op.getOperand(2));
7678 }
7679
7680 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7681 CondV, DAG.getConstant(0, DL, XLenVT),
7682 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7683}
7684
7685SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7688
7689 SDLoc DL(Op);
7690 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7692
7693 // vastart just stores the address of the VarArgsFrameIndex slot into the
7694 // memory location argument.
7695 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7696 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7697 MachinePointerInfo(SV));
7698}
7699
7700SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7701 SelectionDAG &DAG) const {
7702 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7704 MachineFrameInfo &MFI = MF.getFrameInfo();
7705 MFI.setFrameAddressIsTaken(true);
7706 Register FrameReg = RI.getFrameRegister(MF);
7707 int XLenInBytes = Subtarget.getXLen() / 8;
7708
7709 EVT VT = Op.getValueType();
7710 SDLoc DL(Op);
7711 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7712 unsigned Depth = Op.getConstantOperandVal(0);
7713 while (Depth--) {
7714 int Offset = -(XLenInBytes * 2);
7715 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7717 FrameAddr =
7718 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7719 }
7720 return FrameAddr;
7721}
7722
7723SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7724 SelectionDAG &DAG) const {
7725 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7727 MachineFrameInfo &MFI = MF.getFrameInfo();
7728 MFI.setReturnAddressIsTaken(true);
7729 MVT XLenVT = Subtarget.getXLenVT();
7730 int XLenInBytes = Subtarget.getXLen() / 8;
7731
7733 return SDValue();
7734
7735 EVT VT = Op.getValueType();
7736 SDLoc DL(Op);
7737 unsigned Depth = Op.getConstantOperandVal(0);
7738 if (Depth) {
7739 int Off = -XLenInBytes;
7740 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7741 SDValue Offset = DAG.getConstant(Off, DL, VT);
7742 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7743 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7745 }
7746
7747 // Return the value of the return address register, marking it an implicit
7748 // live-in.
7749 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7750 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7751}
7752
7753SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7754 SelectionDAG &DAG) const {
7755 SDLoc DL(Op);
7756 SDValue Lo = Op.getOperand(0);
7757 SDValue Hi = Op.getOperand(1);
7758 SDValue Shamt = Op.getOperand(2);
7759 EVT VT = Lo.getValueType();
7760
7761 // if Shamt-XLEN < 0: // Shamt < XLEN
7762 // Lo = Lo << Shamt
7763 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7764 // else:
7765 // Lo = 0
7766 // Hi = Lo << (Shamt-XLEN)
7767
7768 SDValue Zero = DAG.getConstant(0, DL, VT);
7769 SDValue One = DAG.getConstant(1, DL, VT);
7770 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7771 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7772 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7773 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7774
7775 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7776 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7777 SDValue ShiftRightLo =
7778 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7779 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7780 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7781 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7782
7783 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7784
7785 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7786 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7787
7788 SDValue Parts[2] = {Lo, Hi};
7789 return DAG.getMergeValues(Parts, DL);
7790}
7791
7792SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7793 bool IsSRA) const {
7794 SDLoc DL(Op);
7795 SDValue Lo = Op.getOperand(0);
7796 SDValue Hi = Op.getOperand(1);
7797 SDValue Shamt = Op.getOperand(2);
7798 EVT VT = Lo.getValueType();
7799
7800 // SRA expansion:
7801 // if Shamt-XLEN < 0: // Shamt < XLEN
7802 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7803 // Hi = Hi >>s Shamt
7804 // else:
7805 // Lo = Hi >>s (Shamt-XLEN);
7806 // Hi = Hi >>s (XLEN-1)
7807 //
7808 // SRL expansion:
7809 // if Shamt-XLEN < 0: // Shamt < XLEN
7810 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7811 // Hi = Hi >>u Shamt
7812 // else:
7813 // Lo = Hi >>u (Shamt-XLEN);
7814 // Hi = 0;
7815
7816 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7817
7818 SDValue Zero = DAG.getConstant(0, DL, VT);
7819 SDValue One = DAG.getConstant(1, DL, VT);
7820 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7821 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7822 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7823 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7824
7825 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7826 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7827 SDValue ShiftLeftHi =
7828 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7829 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7830 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7831 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7832 SDValue HiFalse =
7833 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7834
7835 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7836
7837 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7838 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7839
7840 SDValue Parts[2] = {Lo, Hi};
7841 return DAG.getMergeValues(Parts, DL);
7842}
7843
7844// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7845// legal equivalently-sized i8 type, so we can use that as a go-between.
7846SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7847 SelectionDAG &DAG) const {
7848 SDLoc DL(Op);
7849 MVT VT = Op.getSimpleValueType();
7850 SDValue SplatVal = Op.getOperand(0);
7851 // All-zeros or all-ones splats are handled specially.
7852 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7853 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7854 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7855 }
7856 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7857 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7858 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7859 }
7860 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7861 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7862 DAG.getConstant(1, DL, SplatVal.getValueType()));
7863 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7864 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7865 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7866}
7867
7868// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7869// illegal (currently only vXi64 RV32).
7870// FIXME: We could also catch non-constant sign-extended i32 values and lower
7871// them to VMV_V_X_VL.
7872SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7873 SelectionDAG &DAG) const {
7874 SDLoc DL(Op);
7875 MVT VecVT = Op.getSimpleValueType();
7876 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7877 "Unexpected SPLAT_VECTOR_PARTS lowering");
7878
7879 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7880 SDValue Lo = Op.getOperand(0);
7881 SDValue Hi = Op.getOperand(1);
7882
7883 MVT ContainerVT = VecVT;
7884 if (VecVT.isFixedLengthVector())
7885 ContainerVT = getContainerForFixedLengthVector(VecVT);
7886
7887 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7888
7889 SDValue Res =
7890 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7891
7892 if (VecVT.isFixedLengthVector())
7893 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7894
7895 return Res;
7896}
7897
7898// Custom-lower extensions from mask vectors by using a vselect either with 1
7899// for zero/any-extension or -1 for sign-extension:
7900// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7901// Note that any-extension is lowered identically to zero-extension.
7902SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7903 int64_t ExtTrueVal) const {
7904 SDLoc DL(Op);
7905 MVT VecVT = Op.getSimpleValueType();
7906 SDValue Src = Op.getOperand(0);
7907 // Only custom-lower extensions from mask types
7908 assert(Src.getValueType().isVector() &&
7909 Src.getValueType().getVectorElementType() == MVT::i1);
7910
7911 if (VecVT.isScalableVector()) {
7912 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7913 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7914 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7915 }
7916
7917 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7918 MVT I1ContainerVT =
7919 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7920
7921 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7922
7923 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7924
7925 MVT XLenVT = Subtarget.getXLenVT();
7926 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7927 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7928
7929 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7930 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7931 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7932 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7933 SDValue Select =
7934 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7935 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7936
7937 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7938}
7939
7940SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7941 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7942 MVT ExtVT = Op.getSimpleValueType();
7943 // Only custom-lower extensions from fixed-length vector types.
7944 if (!ExtVT.isFixedLengthVector())
7945 return Op;
7946 MVT VT = Op.getOperand(0).getSimpleValueType();
7947 // Grab the canonical container type for the extended type. Infer the smaller
7948 // type from that to ensure the same number of vector elements, as we know
7949 // the LMUL will be sufficient to hold the smaller type.
7950 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7951 // Get the extended container type manually to ensure the same number of
7952 // vector elements between source and dest.
7953 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7954 ContainerExtVT.getVectorElementCount());
7955
7956 SDValue Op1 =
7957 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7958
7959 SDLoc DL(Op);
7960 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7961
7962 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
7963
7964 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
7965}
7966
7967// Custom-lower truncations from vectors to mask vectors by using a mask and a
7968// setcc operation:
7969// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7970SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7971 SelectionDAG &DAG) const {
7972 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7973 SDLoc DL(Op);
7974 EVT MaskVT = Op.getValueType();
7975 // Only expect to custom-lower truncations to mask types
7976 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7977 "Unexpected type for vector mask lowering");
7978 SDValue Src = Op.getOperand(0);
7979 MVT VecVT = Src.getSimpleValueType();
7980 SDValue Mask, VL;
7981 if (IsVPTrunc) {
7982 Mask = Op.getOperand(1);
7983 VL = Op.getOperand(2);
7984 }
7985 // If this is a fixed vector, we need to convert it to a scalable vector.
7986 MVT ContainerVT = VecVT;
7987
7988 if (VecVT.isFixedLengthVector()) {
7989 ContainerVT = getContainerForFixedLengthVector(VecVT);
7990 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7991 if (IsVPTrunc) {
7992 MVT MaskContainerVT =
7993 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7994 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7995 }
7996 }
7997
7998 if (!IsVPTrunc) {
7999 std::tie(Mask, VL) =
8000 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8001 }
8002
8003 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8004 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8005
8006 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8007 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8008 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8009 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8010
8011 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8012 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8013 DAG.getUNDEF(ContainerVT), Mask, VL);
8014 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8015 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8016 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8017 if (MaskVT.isFixedLengthVector())
8018 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8019 return Trunc;
8020}
8021
8022SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8023 SelectionDAG &DAG) const {
8024 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8025 SDLoc DL(Op);
8026
8027 MVT VT = Op.getSimpleValueType();
8028 // Only custom-lower vector truncates
8029 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8030
8031 // Truncates to mask types are handled differently
8032 if (VT.getVectorElementType() == MVT::i1)
8033 return lowerVectorMaskTruncLike(Op, DAG);
8034
8035 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8036 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8037 // truncate by one power of two at a time.
8038 MVT DstEltVT = VT.getVectorElementType();
8039
8040 SDValue Src = Op.getOperand(0);
8041 MVT SrcVT = Src.getSimpleValueType();
8042 MVT SrcEltVT = SrcVT.getVectorElementType();
8043
8044 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8045 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8046 "Unexpected vector truncate lowering");
8047
8048 MVT ContainerVT = SrcVT;
8049 SDValue Mask, VL;
8050 if (IsVPTrunc) {
8051 Mask = Op.getOperand(1);
8052 VL = Op.getOperand(2);
8053 }
8054 if (SrcVT.isFixedLengthVector()) {
8055 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8056 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8057 if (IsVPTrunc) {
8058 MVT MaskVT = getMaskTypeFor(ContainerVT);
8059 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8060 }
8061 }
8062
8063 SDValue Result = Src;
8064 if (!IsVPTrunc) {
8065 std::tie(Mask, VL) =
8066 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8067 }
8068
8069 LLVMContext &Context = *DAG.getContext();
8070 const ElementCount Count = ContainerVT.getVectorElementCount();
8071 do {
8072 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8073 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
8074 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8075 Mask, VL);
8076 } while (SrcEltVT != DstEltVT);
8077
8078 if (SrcVT.isFixedLengthVector())
8079 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8080
8081 return Result;
8082}
8083
8084SDValue
8085RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8086 SelectionDAG &DAG) const {
8087 SDLoc DL(Op);
8088 SDValue Chain = Op.getOperand(0);
8089 SDValue Src = Op.getOperand(1);
8090 MVT VT = Op.getSimpleValueType();
8091 MVT SrcVT = Src.getSimpleValueType();
8092 MVT ContainerVT = VT;
8093 if (VT.isFixedLengthVector()) {
8094 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8095 ContainerVT =
8096 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8097 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8098 }
8099
8100 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8101
8102 // RVV can only widen/truncate fp to types double/half the size as the source.
8103 if ((VT.getVectorElementType() == MVT::f64 &&
8104 SrcVT.getVectorElementType() == MVT::f16) ||
8105 (VT.getVectorElementType() == MVT::f16 &&
8106 SrcVT.getVectorElementType() == MVT::f64)) {
8107 // For double rounding, the intermediate rounding should be round-to-odd.
8108 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8111 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8112 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8113 Chain, Src, Mask, VL);
8114 Chain = Src.getValue(1);
8115 }
8116
8117 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8120 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8121 Chain, Src, Mask, VL);
8122 if (VT.isFixedLengthVector()) {
8123 // StrictFP operations have two result values. Their lowered result should
8124 // have same result count.
8125 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8126 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8127 }
8128 return Res;
8129}
8130
8131SDValue
8132RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8133 SelectionDAG &DAG) const {
8134 bool IsVP =
8135 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8136 bool IsExtend =
8137 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8138 // RVV can only do truncate fp to types half the size as the source. We
8139 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8140 // conversion instruction.
8141 SDLoc DL(Op);
8142 MVT VT = Op.getSimpleValueType();
8143
8144 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8145
8146 SDValue Src = Op.getOperand(0);
8147 MVT SrcVT = Src.getSimpleValueType();
8148
8149 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8150 SrcVT.getVectorElementType() != MVT::f16);
8151 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
8152 SrcVT.getVectorElementType() != MVT::f64);
8153
8154 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8155
8156 // Prepare any fixed-length vector operands.
8157 MVT ContainerVT = VT;
8158 SDValue Mask, VL;
8159 if (IsVP) {
8160 Mask = Op.getOperand(1);
8161 VL = Op.getOperand(2);
8162 }
8163 if (VT.isFixedLengthVector()) {
8164 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8165 ContainerVT =
8166 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8167 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8168 if (IsVP) {
8169 MVT MaskVT = getMaskTypeFor(ContainerVT);
8170 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8171 }
8172 }
8173
8174 if (!IsVP)
8175 std::tie(Mask, VL) =
8176 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8177
8178 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8179
8180 if (IsDirectConv) {
8181 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8182 if (VT.isFixedLengthVector())
8183 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8184 return Src;
8185 }
8186
8187 unsigned InterConvOpc =
8189
8190 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8191 SDValue IntermediateConv =
8192 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8193 SDValue Result =
8194 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8195 if (VT.isFixedLengthVector())
8196 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8197 return Result;
8198}
8199
8200// Given a scalable vector type and an index into it, returns the type for the
8201// smallest subvector that the index fits in. This can be used to reduce LMUL
8202// for operations like vslidedown.
8203//
8204// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8205static std::optional<MVT>
8206getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8207 const RISCVSubtarget &Subtarget) {
8208 assert(VecVT.isScalableVector());
8209 const unsigned EltSize = VecVT.getScalarSizeInBits();
8210 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8211 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8212 MVT SmallerVT;
8213 if (MaxIdx < MinVLMAX)
8214 SmallerVT = getLMUL1VT(VecVT);
8215 else if (MaxIdx < MinVLMAX * 2)
8216 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8217 else if (MaxIdx < MinVLMAX * 4)
8218 SmallerVT = getLMUL1VT(VecVT)
8221 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8222 return std::nullopt;
8223 return SmallerVT;
8224}
8225
8226// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8227// first position of a vector, and that vector is slid up to the insert index.
8228// By limiting the active vector length to index+1 and merging with the
8229// original vector (with an undisturbed tail policy for elements >= VL), we
8230// achieve the desired result of leaving all elements untouched except the one
8231// at VL-1, which is replaced with the desired value.
8232SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8233 SelectionDAG &DAG) const {
8234 SDLoc DL(Op);
8235 MVT VecVT = Op.getSimpleValueType();
8236 SDValue Vec = Op.getOperand(0);
8237 SDValue Val = Op.getOperand(1);
8238 SDValue Idx = Op.getOperand(2);
8239
8240 if (VecVT.getVectorElementType() == MVT::i1) {
8241 // FIXME: For now we just promote to an i8 vector and insert into that,
8242 // but this is probably not optimal.
8243 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8244 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8245 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8246 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8247 }
8248
8249 MVT ContainerVT = VecVT;
8250 // If the operand is a fixed-length vector, convert to a scalable one.
8251 if (VecVT.isFixedLengthVector()) {
8252 ContainerVT = getContainerForFixedLengthVector(VecVT);
8253 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8254 }
8255
8256 // If we know the index we're going to insert at, we can shrink Vec so that
8257 // we're performing the scalar inserts and slideup on a smaller LMUL.
8258 MVT OrigContainerVT = ContainerVT;
8259 SDValue OrigVec = Vec;
8260 SDValue AlignedIdx;
8261 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8262 const unsigned OrigIdx = IdxC->getZExtValue();
8263 // Do we know an upper bound on LMUL?
8264 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8265 DL, DAG, Subtarget)) {
8266 ContainerVT = *ShrunkVT;
8267 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8268 }
8269
8270 // If we're compiling for an exact VLEN value, we can always perform
8271 // the insert in m1 as we can determine the register corresponding to
8272 // the index in the register group.
8273 const MVT M1VT = getLMUL1VT(ContainerVT);
8274 if (auto VLEN = Subtarget.getRealVLen();
8275 VLEN && ContainerVT.bitsGT(M1VT)) {
8276 EVT ElemVT = VecVT.getVectorElementType();
8277 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8278 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8279 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8280 unsigned ExtractIdx =
8281 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8282 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8283 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8284 ContainerVT = M1VT;
8285 }
8286
8287 if (AlignedIdx)
8288 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8289 AlignedIdx);
8290 }
8291
8292 MVT XLenVT = Subtarget.getXLenVT();
8293
8294 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8295 // Even i64-element vectors on RV32 can be lowered without scalar
8296 // legalization if the most-significant 32 bits of the value are not affected
8297 // by the sign-extension of the lower 32 bits.
8298 // TODO: We could also catch sign extensions of a 32-bit value.
8299 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8300 const auto *CVal = cast<ConstantSDNode>(Val);
8301 if (isInt<32>(CVal->getSExtValue())) {
8302 IsLegalInsert = true;
8303 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8304 }
8305 }
8306
8307 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8308
8309 SDValue ValInVec;
8310
8311 if (IsLegalInsert) {
8312 unsigned Opc =
8314 if (isNullConstant(Idx)) {
8315 if (!VecVT.isFloatingPoint())
8316 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8317 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8318
8319 if (AlignedIdx)
8320 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8321 Vec, AlignedIdx);
8322 if (!VecVT.isFixedLengthVector())
8323 return Vec;
8324 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8325 }
8326 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8327 } else {
8328 // On RV32, i64-element vectors must be specially handled to place the
8329 // value at element 0, by using two vslide1down instructions in sequence on
8330 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8331 // this.
8332 SDValue ValLo, ValHi;
8333 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8334 MVT I32ContainerVT =
8335 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8336 SDValue I32Mask =
8337 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8338 // Limit the active VL to two.
8339 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8340 // If the Idx is 0 we can insert directly into the vector.
8341 if (isNullConstant(Idx)) {
8342 // First slide in the lo value, then the hi in above it. We use slide1down
8343 // to avoid the register group overlap constraint of vslide1up.
8344 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8345 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8346 // If the source vector is undef don't pass along the tail elements from
8347 // the previous slide1down.
8348 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8349 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8350 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8351 // Bitcast back to the right container type.
8352 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8353
8354 if (AlignedIdx)
8355 ValInVec =
8356 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8357 ValInVec, AlignedIdx);
8358 if (!VecVT.isFixedLengthVector())
8359 return ValInVec;
8360 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8361 }
8362
8363 // First slide in the lo value, then the hi in above it. We use slide1down
8364 // to avoid the register group overlap constraint of vslide1up.
8365 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8366 DAG.getUNDEF(I32ContainerVT),
8367 DAG.getUNDEF(I32ContainerVT), ValLo,
8368 I32Mask, InsertI64VL);
8369 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8370 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8371 I32Mask, InsertI64VL);
8372 // Bitcast back to the right container type.
8373 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8374 }
8375
8376 // Now that the value is in a vector, slide it into position.
8377 SDValue InsertVL =
8378 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8379
8380 // Use tail agnostic policy if Idx is the last index of Vec.
8382 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8383 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8384 Policy = RISCVII::TAIL_AGNOSTIC;
8385 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8386 Idx, Mask, InsertVL, Policy);
8387
8388 if (AlignedIdx)
8389 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8390 Slideup, AlignedIdx);
8391 if (!VecVT.isFixedLengthVector())
8392 return Slideup;
8393 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8394}
8395
8396// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8397// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8398// types this is done using VMV_X_S to allow us to glean information about the
8399// sign bits of the result.
8400SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8401 SelectionDAG &DAG) const {
8402 SDLoc DL(Op);
8403 SDValue Idx = Op.getOperand(1);
8404 SDValue Vec = Op.getOperand(0);
8405 EVT EltVT = Op.getValueType();
8406 MVT VecVT = Vec.getSimpleValueType();
8407 MVT XLenVT = Subtarget.getXLenVT();
8408
8409 if (VecVT.getVectorElementType() == MVT::i1) {
8410 // Use vfirst.m to extract the first bit.
8411 if (isNullConstant(Idx)) {
8412 MVT ContainerVT = VecVT;
8413 if (VecVT.isFixedLengthVector()) {
8414 ContainerVT = getContainerForFixedLengthVector(VecVT);
8415 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8416 }
8417 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8418 SDValue Vfirst =
8419 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8420 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8421 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8422 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8423 }
8424 if (VecVT.isFixedLengthVector()) {
8425 unsigned NumElts = VecVT.getVectorNumElements();
8426 if (NumElts >= 8) {
8427 MVT WideEltVT;
8428 unsigned WidenVecLen;
8429 SDValue ExtractElementIdx;
8430 SDValue ExtractBitIdx;
8431 unsigned MaxEEW = Subtarget.getELen();
8432 MVT LargestEltVT = MVT::getIntegerVT(
8433 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8434 if (NumElts <= LargestEltVT.getSizeInBits()) {
8435 assert(isPowerOf2_32(NumElts) &&
8436 "the number of elements should be power of 2");
8437 WideEltVT = MVT::getIntegerVT(NumElts);
8438 WidenVecLen = 1;
8439 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8440 ExtractBitIdx = Idx;
8441 } else {
8442 WideEltVT = LargestEltVT;
8443 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8444 // extract element index = index / element width
8445 ExtractElementIdx = DAG.getNode(
8446 ISD::SRL, DL, XLenVT, Idx,
8447 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8448 // mask bit index = index % element width
8449 ExtractBitIdx = DAG.getNode(
8450 ISD::AND, DL, XLenVT, Idx,
8451 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8452 }
8453 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8454 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8455 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8456 Vec, ExtractElementIdx);
8457 // Extract the bit from GPR.
8458 SDValue ShiftRight =
8459 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8460 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8461 DAG.getConstant(1, DL, XLenVT));
8462 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8463 }
8464 }
8465 // Otherwise, promote to an i8 vector and extract from that.
8466 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8467 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8468 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8469 }
8470
8471 // If this is a fixed vector, we need to convert it to a scalable vector.
8472 MVT ContainerVT = VecVT;
8473 if (VecVT.isFixedLengthVector()) {
8474 ContainerVT = getContainerForFixedLengthVector(VecVT);
8475 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8476 }
8477
8478 // If we're compiling for an exact VLEN value and we have a known
8479 // constant index, we can always perform the extract in m1 (or
8480 // smaller) as we can determine the register corresponding to
8481 // the index in the register group.
8482 const auto VLen = Subtarget.getRealVLen();
8483 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8484 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8485 MVT M1VT = getLMUL1VT(ContainerVT);
8486 unsigned OrigIdx = IdxC->getZExtValue();
8487 EVT ElemVT = VecVT.getVectorElementType();
8488 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8489 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8490 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8491 unsigned ExtractIdx =
8492 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8493 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8494 DAG.getVectorIdxConstant(ExtractIdx, DL));
8495 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8496 ContainerVT = M1VT;
8497 }
8498
8499 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8500 // contains our index.
8501 std::optional<uint64_t> MaxIdx;
8502 if (VecVT.isFixedLengthVector())
8503 MaxIdx = VecVT.getVectorNumElements() - 1;
8504 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8505 MaxIdx = IdxC->getZExtValue();
8506 if (MaxIdx) {
8507 if (auto SmallerVT =
8508 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8509 ContainerVT = *SmallerVT;
8510 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8511 DAG.getConstant(0, DL, XLenVT));
8512 }
8513 }
8514
8515 // If after narrowing, the required slide is still greater than LMUL2,
8516 // fallback to generic expansion and go through the stack. This is done
8517 // for a subtle reason: extracting *all* elements out of a vector is
8518 // widely expected to be linear in vector size, but because vslidedown
8519 // is linear in LMUL, performing N extracts using vslidedown becomes
8520 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8521 // seems to have the same problem (the store is linear in LMUL), but the
8522 // generic expansion *memoizes* the store, and thus for many extracts of
8523 // the same vector we end up with one store and a bunch of loads.
8524 // TODO: We don't have the same code for insert_vector_elt because we
8525 // have BUILD_VECTOR and handle the degenerate case there. Should we
8526 // consider adding an inverse BUILD_VECTOR node?
8527 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8528 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8529 return SDValue();
8530
8531 // If the index is 0, the vector is already in the right position.
8532 if (!isNullConstant(Idx)) {
8533 // Use a VL of 1 to avoid processing more elements than we need.
8534 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8535 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8536 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8537 }
8538
8539 if (!EltVT.isInteger()) {
8540 // Floating-point extracts are handled in TableGen.
8541 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8542 DAG.getVectorIdxConstant(0, DL));
8543 }
8544
8545 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8546 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8547}
8548
8549// Some RVV intrinsics may claim that they want an integer operand to be
8550// promoted or expanded.
8552 const RISCVSubtarget &Subtarget) {
8553 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8554 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8555 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8556 "Unexpected opcode");
8557
8558 if (!Subtarget.hasVInstructions())
8559 return SDValue();
8560
8561 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8562 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8563 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8564
8565 SDLoc DL(Op);
8566
8568 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8569 if (!II || !II->hasScalarOperand())
8570 return SDValue();
8571
8572 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8573 assert(SplatOp < Op.getNumOperands());
8574
8575 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8576 SDValue &ScalarOp = Operands[SplatOp];
8577 MVT OpVT = ScalarOp.getSimpleValueType();
8578 MVT XLenVT = Subtarget.getXLenVT();
8579
8580 // If this isn't a scalar, or its type is XLenVT we're done.
8581 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8582 return SDValue();
8583
8584 // Simplest case is that the operand needs to be promoted to XLenVT.
8585 if (OpVT.bitsLT(XLenVT)) {
8586 // If the operand is a constant, sign extend to increase our chances
8587 // of being able to use a .vi instruction. ANY_EXTEND would become a
8588 // a zero extend and the simm5 check in isel would fail.
8589 // FIXME: Should we ignore the upper bits in isel instead?
8590 unsigned ExtOpc =
8591 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8592 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8593 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8594 }
8595
8596 // Use the previous operand to get the vXi64 VT. The result might be a mask
8597 // VT for compares. Using the previous operand assumes that the previous
8598 // operand will never have a smaller element size than a scalar operand and
8599 // that a widening operation never uses SEW=64.
8600 // NOTE: If this fails the below assert, we can probably just find the
8601 // element count from any operand or result and use it to construct the VT.
8602 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8603 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8604
8605 // The more complex case is when the scalar is larger than XLenVT.
8606 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8607 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8608
8609 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8610 // instruction to sign-extend since SEW>XLEN.
8611 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8612 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8613 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8614 }
8615
8616 switch (IntNo) {
8617 case Intrinsic::riscv_vslide1up:
8618 case Intrinsic::riscv_vslide1down:
8619 case Intrinsic::riscv_vslide1up_mask:
8620 case Intrinsic::riscv_vslide1down_mask: {
8621 // We need to special case these when the scalar is larger than XLen.
8622 unsigned NumOps = Op.getNumOperands();
8623 bool IsMasked = NumOps == 7;
8624
8625 // Convert the vector source to the equivalent nxvXi32 vector.
8626 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8627 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8628 SDValue ScalarLo, ScalarHi;
8629 std::tie(ScalarLo, ScalarHi) =
8630 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8631
8632 // Double the VL since we halved SEW.
8633 SDValue AVL = getVLOperand(Op);
8634 SDValue I32VL;
8635
8636 // Optimize for constant AVL
8637 if (isa<ConstantSDNode>(AVL)) {
8638 const auto [MinVLMAX, MaxVLMAX] =
8640
8641 uint64_t AVLInt = AVL->getAsZExtVal();
8642 if (AVLInt <= MinVLMAX) {
8643 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8644 } else if (AVLInt >= 2 * MaxVLMAX) {
8645 // Just set vl to VLMAX in this situation
8647 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8648 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8649 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8650 SDValue SETVLMAX = DAG.getTargetConstant(
8651 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8652 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8653 LMUL);
8654 } else {
8655 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8656 // is related to the hardware implementation.
8657 // So let the following code handle
8658 }
8659 }
8660 if (!I32VL) {
8662 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8663 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8664 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8665 SDValue SETVL =
8666 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8667 // Using vsetvli instruction to get actually used length which related to
8668 // the hardware implementation
8669 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8670 SEW, LMUL);
8671 I32VL =
8672 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8673 }
8674
8675 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8676
8677 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8678 // instructions.
8679 SDValue Passthru;
8680 if (IsMasked)
8681 Passthru = DAG.getUNDEF(I32VT);
8682 else
8683 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8684
8685 if (IntNo == Intrinsic::riscv_vslide1up ||
8686 IntNo == Intrinsic::riscv_vslide1up_mask) {
8687 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8688 ScalarHi, I32Mask, I32VL);
8689 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8690 ScalarLo, I32Mask, I32VL);
8691 } else {
8692 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8693 ScalarLo, I32Mask, I32VL);
8694 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8695 ScalarHi, I32Mask, I32VL);
8696 }
8697
8698 // Convert back to nxvXi64.
8699 Vec = DAG.getBitcast(VT, Vec);
8700
8701 if (!IsMasked)
8702 return Vec;
8703 // Apply mask after the operation.
8704 SDValue Mask = Operands[NumOps - 3];
8705 SDValue MaskedOff = Operands[1];
8706 // Assume Policy operand is the last operand.
8707 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8708 // We don't need to select maskedoff if it's undef.
8709 if (MaskedOff.isUndef())
8710 return Vec;
8711 // TAMU
8712 if (Policy == RISCVII::TAIL_AGNOSTIC)
8713 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8714 DAG.getUNDEF(VT), AVL);
8715 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8716 // It's fine because vmerge does not care mask policy.
8717 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8718 MaskedOff, AVL);
8719 }
8720 }
8721
8722 // We need to convert the scalar to a splat vector.
8723 SDValue VL = getVLOperand(Op);
8724 assert(VL.getValueType() == XLenVT);
8725 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8726 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8727}
8728
8729// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8730// scalable vector llvm.get.vector.length for now.
8731//
8732// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8733// (vscale * VF). The vscale and VF are independent of element width. We use
8734// SEW=8 for the vsetvli because it is the only element width that supports all
8735// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8736// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8737// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8738// SEW and LMUL are better for the surrounding vector instructions.
8740 const RISCVSubtarget &Subtarget) {
8741 MVT XLenVT = Subtarget.getXLenVT();
8742
8743 // The smallest LMUL is only valid for the smallest element width.
8744 const unsigned ElementWidth = 8;
8745
8746 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8747 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8748 // We don't support VF==1 with ELEN==32.
8749 [[maybe_unused]] unsigned MinVF =
8750 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8751
8752 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8753 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8754 "Unexpected VF");
8755
8756 bool Fractional = VF < LMul1VF;
8757 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8758 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8759 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8760
8761 SDLoc DL(N);
8762
8763 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8764 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8765
8766 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8767
8768 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8769 SDValue Res =
8770 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8771 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8772}
8773
8775 const RISCVSubtarget &Subtarget) {
8776 SDValue Op0 = N->getOperand(1);
8777 MVT OpVT = Op0.getSimpleValueType();
8778 MVT ContainerVT = OpVT;
8779 if (OpVT.isFixedLengthVector()) {
8780 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8781 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8782 }
8783 MVT XLenVT = Subtarget.getXLenVT();
8784 SDLoc DL(N);
8785 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8786 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8787 if (isOneConstant(N->getOperand(2)))
8788 return Res;
8789
8790 // Convert -1 to VL.
8791 SDValue Setcc =
8792 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8793 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8794 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8795}
8796
8797static inline void promoteVCIXScalar(const SDValue &Op,
8799 SelectionDAG &DAG) {
8800 const RISCVSubtarget &Subtarget =
8802
8803 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8804 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8805 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8806 SDLoc DL(Op);
8807
8809 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8810 if (!II || !II->hasScalarOperand())
8811 return;
8812
8813 unsigned SplatOp = II->ScalarOperand + 1;
8814 assert(SplatOp < Op.getNumOperands());
8815
8816 SDValue &ScalarOp = Operands[SplatOp];
8817 MVT OpVT = ScalarOp.getSimpleValueType();
8818 MVT XLenVT = Subtarget.getXLenVT();
8819
8820 // The code below is partially copied from lowerVectorIntrinsicScalars.
8821 // If this isn't a scalar, or its type is XLenVT we're done.
8822 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8823 return;
8824
8825 // Manually emit promote operation for scalar operation.
8826 if (OpVT.bitsLT(XLenVT)) {
8827 unsigned ExtOpc =
8828 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8829 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8830 }
8831
8832 return;
8833}
8834
8835static void processVCIXOperands(SDValue &OrigOp,
8837 SelectionDAG &DAG) {
8838 promoteVCIXScalar(OrigOp, Operands, DAG);
8839 const RISCVSubtarget &Subtarget =
8841 for (SDValue &V : Operands) {
8842 EVT ValType = V.getValueType();
8843 if (ValType.isVector() && ValType.isFloatingPoint()) {
8844 MVT InterimIVT =
8845 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8846 ValType.getVectorElementCount());
8847 V = DAG.getBitcast(InterimIVT, V);
8848 }
8849 if (ValType.isFixedLengthVector()) {
8850 MVT OpContainerVT = getContainerForFixedLengthVector(
8851 DAG, V.getSimpleValueType(), Subtarget);
8852 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8853 }
8854 }
8855}
8856
8857// LMUL * VLEN should be greater than or equal to EGS * SEW
8858static inline bool isValidEGW(int EGS, EVT VT,
8859 const RISCVSubtarget &Subtarget) {
8860 return (Subtarget.getRealMinVLen() *
8862 EGS * VT.getScalarSizeInBits();
8863}
8864
8865SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8866 SelectionDAG &DAG) const {
8867 unsigned IntNo = Op.getConstantOperandVal(0);
8868 SDLoc DL(Op);
8869 MVT XLenVT = Subtarget.getXLenVT();
8870
8871 switch (IntNo) {
8872 default:
8873 break; // Don't custom lower most intrinsics.
8874 case Intrinsic::thread_pointer: {
8875 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8876 return DAG.getRegister(RISCV::X4, PtrVT);
8877 }
8878 case Intrinsic::riscv_orc_b:
8879 case Intrinsic::riscv_brev8:
8880 case Intrinsic::riscv_sha256sig0:
8881 case Intrinsic::riscv_sha256sig1:
8882 case Intrinsic::riscv_sha256sum0:
8883 case Intrinsic::riscv_sha256sum1:
8884 case Intrinsic::riscv_sm3p0:
8885 case Intrinsic::riscv_sm3p1: {
8886 unsigned Opc;
8887 switch (IntNo) {
8888 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8889 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8890 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8891 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8892 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8893 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8894 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8895 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8896 }
8897
8898 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8899 SDValue NewOp =
8900 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8901 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8902 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8903 }
8904
8905 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8906 }
8907 case Intrinsic::riscv_sm4ks:
8908 case Intrinsic::riscv_sm4ed: {
8909 unsigned Opc =
8910 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8911
8912 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8913 SDValue NewOp0 =
8914 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8915 SDValue NewOp1 =
8916 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8917 SDValue Res =
8918 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8919 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8920 }
8921
8922 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8923 Op.getOperand(3));
8924 }
8925 case Intrinsic::riscv_zip:
8926 case Intrinsic::riscv_unzip: {
8927 unsigned Opc =
8928 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8929 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8930 }
8931 case Intrinsic::riscv_mopr: {
8932 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8933 SDValue NewOp =
8934 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8935 SDValue Res = DAG.getNode(
8936 RISCVISD::MOPR, DL, MVT::i64, NewOp,
8937 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
8938 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8939 }
8940 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8941 Op.getOperand(2));
8942 }
8943
8944 case Intrinsic::riscv_moprr: {
8945 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8946 SDValue NewOp0 =
8947 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8948 SDValue NewOp1 =
8949 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8950 SDValue Res = DAG.getNode(
8951 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
8952 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
8953 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8954 }
8955 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
8956 Op.getOperand(2), Op.getOperand(3));
8957 }
8958 case Intrinsic::riscv_clmul:
8959 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8960 SDValue NewOp0 =
8961 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8962 SDValue NewOp1 =
8963 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8964 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8965 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8966 }
8967 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8968 Op.getOperand(2));
8969 case Intrinsic::riscv_clmulh:
8970 case Intrinsic::riscv_clmulr: {
8971 unsigned Opc =
8972 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8973 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8974 SDValue NewOp0 =
8975 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8976 SDValue NewOp1 =
8977 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8978 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8979 DAG.getConstant(32, DL, MVT::i64));
8980 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8981 DAG.getConstant(32, DL, MVT::i64));
8982 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8983 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8984 DAG.getConstant(32, DL, MVT::i64));
8985 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8986 }
8987
8988 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8989 }
8990 case Intrinsic::experimental_get_vector_length:
8991 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8992 case Intrinsic::experimental_cttz_elts:
8993 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
8994 case Intrinsic::riscv_vmv_x_s: {
8995 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8996 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8997 }
8998 case Intrinsic::riscv_vfmv_f_s:
8999 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9000 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9001 case Intrinsic::riscv_vmv_v_x:
9002 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9003 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9004 Subtarget);
9005 case Intrinsic::riscv_vfmv_v_f:
9006 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9007 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9008 case Intrinsic::riscv_vmv_s_x: {
9009 SDValue Scalar = Op.getOperand(2);
9010
9011 if (Scalar.getValueType().bitsLE(XLenVT)) {
9012 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9013 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9014 Op.getOperand(1), Scalar, Op.getOperand(3));
9015 }
9016
9017 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9018
9019 // This is an i64 value that lives in two scalar registers. We have to
9020 // insert this in a convoluted way. First we build vXi64 splat containing
9021 // the two values that we assemble using some bit math. Next we'll use
9022 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9023 // to merge element 0 from our splat into the source vector.
9024 // FIXME: This is probably not the best way to do this, but it is
9025 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9026 // point.
9027 // sw lo, (a0)
9028 // sw hi, 4(a0)
9029 // vlse vX, (a0)
9030 //
9031 // vid.v vVid
9032 // vmseq.vx mMask, vVid, 0
9033 // vmerge.vvm vDest, vSrc, vVal, mMask
9034 MVT VT = Op.getSimpleValueType();
9035 SDValue Vec = Op.getOperand(1);
9036 SDValue VL = getVLOperand(Op);
9037
9038 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9039 if (Op.getOperand(1).isUndef())
9040 return SplattedVal;
9041 SDValue SplattedIdx =
9042 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9043 DAG.getConstant(0, DL, MVT::i32), VL);
9044
9045 MVT MaskVT = getMaskTypeFor(VT);
9046 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9047 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9048 SDValue SelectCond =
9049 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9050 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9051 DAG.getUNDEF(MaskVT), Mask, VL});
9052 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9053 Vec, DAG.getUNDEF(VT), VL);
9054 }
9055 case Intrinsic::riscv_vfmv_s_f:
9056 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9057 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9058 // EGS * EEW >= 128 bits
9059 case Intrinsic::riscv_vaesdf_vv:
9060 case Intrinsic::riscv_vaesdf_vs:
9061 case Intrinsic::riscv_vaesdm_vv:
9062 case Intrinsic::riscv_vaesdm_vs:
9063 case Intrinsic::riscv_vaesef_vv:
9064 case Intrinsic::riscv_vaesef_vs:
9065 case Intrinsic::riscv_vaesem_vv:
9066 case Intrinsic::riscv_vaesem_vs:
9067 case Intrinsic::riscv_vaeskf1:
9068 case Intrinsic::riscv_vaeskf2:
9069 case Intrinsic::riscv_vaesz_vs:
9070 case Intrinsic::riscv_vsm4k:
9071 case Intrinsic::riscv_vsm4r_vv:
9072 case Intrinsic::riscv_vsm4r_vs: {
9073 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9074 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9075 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9076 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9077 return Op;
9078 }
9079 // EGS * EEW >= 256 bits
9080 case Intrinsic::riscv_vsm3c:
9081 case Intrinsic::riscv_vsm3me: {
9082 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9083 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9084 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9085 return Op;
9086 }
9087 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9088 case Intrinsic::riscv_vsha2ch:
9089 case Intrinsic::riscv_vsha2cl:
9090 case Intrinsic::riscv_vsha2ms: {
9091 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9092 !Subtarget.hasStdExtZvknhb())
9093 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9094 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9095 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9096 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9097 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9098 return Op;
9099 }
9100 case Intrinsic::riscv_sf_vc_v_x:
9101 case Intrinsic::riscv_sf_vc_v_i:
9102 case Intrinsic::riscv_sf_vc_v_xv:
9103 case Intrinsic::riscv_sf_vc_v_iv:
9104 case Intrinsic::riscv_sf_vc_v_vv:
9105 case Intrinsic::riscv_sf_vc_v_fv:
9106 case Intrinsic::riscv_sf_vc_v_xvv:
9107 case Intrinsic::riscv_sf_vc_v_ivv:
9108 case Intrinsic::riscv_sf_vc_v_vvv:
9109 case Intrinsic::riscv_sf_vc_v_fvv:
9110 case Intrinsic::riscv_sf_vc_v_xvw:
9111 case Intrinsic::riscv_sf_vc_v_ivw:
9112 case Intrinsic::riscv_sf_vc_v_vvw:
9113 case Intrinsic::riscv_sf_vc_v_fvw: {
9114 MVT VT = Op.getSimpleValueType();
9115
9116 SmallVector<SDValue> Operands{Op->op_values()};
9118
9119 MVT RetVT = VT;
9120 if (VT.isFixedLengthVector())
9122 else if (VT.isFloatingPoint())
9125
9126 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9127
9128 if (VT.isFixedLengthVector())
9129 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9130 else if (VT.isFloatingPoint())
9131 NewNode = DAG.getBitcast(VT, NewNode);
9132
9133 if (Op == NewNode)
9134 break;
9135
9136 return NewNode;
9137 }
9138 }
9139
9140 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9141}
9142
9144 unsigned Type) {
9145 SDLoc DL(Op);
9146 SmallVector<SDValue> Operands{Op->op_values()};
9147 Operands.erase(Operands.begin() + 1);
9148
9149 const RISCVSubtarget &Subtarget =
9151 MVT VT = Op.getSimpleValueType();
9152 MVT RetVT = VT;
9153 MVT FloatVT = VT;
9154
9155 if (VT.isFloatingPoint()) {
9156 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9158 FloatVT = RetVT;
9159 }
9160 if (VT.isFixedLengthVector())
9162 Subtarget);
9163
9165
9166 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9167 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9168 SDValue Chain = NewNode.getValue(1);
9169
9170 if (VT.isFixedLengthVector())
9171 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9172 if (VT.isFloatingPoint())
9173 NewNode = DAG.getBitcast(VT, NewNode);
9174
9175 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9176
9177 return NewNode;
9178}
9179
9181 unsigned Type) {
9182 SmallVector<SDValue> Operands{Op->op_values()};
9183 Operands.erase(Operands.begin() + 1);
9185
9186 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9187}
9188
9189SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9190 SelectionDAG &DAG) const {
9191 unsigned IntNo = Op.getConstantOperandVal(1);
9192 switch (IntNo) {
9193 default:
9194 break;
9195 case Intrinsic::riscv_masked_strided_load: {
9196 SDLoc DL(Op);
9197 MVT XLenVT = Subtarget.getXLenVT();
9198
9199 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9200 // the selection of the masked intrinsics doesn't do this for us.
9201 SDValue Mask = Op.getOperand(5);
9202 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9203
9204 MVT VT = Op->getSimpleValueType(0);
9205 MVT ContainerVT = VT;
9206 if (VT.isFixedLengthVector())
9207 ContainerVT = getContainerForFixedLengthVector(VT);
9208
9209 SDValue PassThru = Op.getOperand(2);
9210 if (!IsUnmasked) {
9211 MVT MaskVT = getMaskTypeFor(ContainerVT);
9212 if (VT.isFixedLengthVector()) {
9213 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9214 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9215 }
9216 }
9217
9218 auto *Load = cast<MemIntrinsicSDNode>(Op);
9219 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9220 SDValue Ptr = Op.getOperand(3);
9221 SDValue Stride = Op.getOperand(4);
9222 SDValue Result, Chain;
9223
9224 // TODO: We restrict this to unmasked loads currently in consideration of
9225 // the complexity of handling all falses masks.
9226 MVT ScalarVT = ContainerVT.getVectorElementType();
9227 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9228 SDValue ScalarLoad =
9229 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9230 ScalarVT, Load->getMemOperand());
9231 Chain = ScalarLoad.getValue(1);
9232 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9233 Subtarget);
9234 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9235 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9236 Load->getMemOperand());
9237 Chain = ScalarLoad.getValue(1);
9238 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9239 } else {
9240 SDValue IntID = DAG.getTargetConstant(
9241 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9242 XLenVT);
9243
9244 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9245 if (IsUnmasked)
9246 Ops.push_back(DAG.getUNDEF(ContainerVT));
9247 else
9248 Ops.push_back(PassThru);
9249 Ops.push_back(Ptr);
9250 Ops.push_back(Stride);
9251 if (!IsUnmasked)
9252 Ops.push_back(Mask);
9253 Ops.push_back(VL);
9254 if (!IsUnmasked) {
9255 SDValue Policy =
9257 Ops.push_back(Policy);
9258 }
9259
9260 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9261 Result =
9263 Load->getMemoryVT(), Load->getMemOperand());
9264 Chain = Result.getValue(1);
9265 }
9266 if (VT.isFixedLengthVector())
9267 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9268 return DAG.getMergeValues({Result, Chain}, DL);
9269 }
9270 case Intrinsic::riscv_seg2_load:
9271 case Intrinsic::riscv_seg3_load:
9272 case Intrinsic::riscv_seg4_load:
9273 case Intrinsic::riscv_seg5_load:
9274 case Intrinsic::riscv_seg6_load:
9275 case Intrinsic::riscv_seg7_load:
9276 case Intrinsic::riscv_seg8_load: {
9277 SDLoc DL(Op);
9278 static const Intrinsic::ID VlsegInts[7] = {
9279 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9280 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9281 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9282 Intrinsic::riscv_vlseg8};
9283 unsigned NF = Op->getNumValues() - 1;
9284 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9285 MVT XLenVT = Subtarget.getXLenVT();
9286 MVT VT = Op->getSimpleValueType(0);
9287 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9288
9289 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9290 Subtarget);
9291 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9292 auto *Load = cast<MemIntrinsicSDNode>(Op);
9293 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9294 ContainerVTs.push_back(MVT::Other);
9295 SDVTList VTs = DAG.getVTList(ContainerVTs);
9296 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9297 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9298 Ops.push_back(Op.getOperand(2));
9299 Ops.push_back(VL);
9300 SDValue Result =
9302 Load->getMemoryVT(), Load->getMemOperand());
9304 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9305 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9306 DAG, Subtarget));
9307 Results.push_back(Result.getValue(NF));
9308 return DAG.getMergeValues(Results, DL);
9309 }
9310 case Intrinsic::riscv_sf_vc_v_x_se:
9312 case Intrinsic::riscv_sf_vc_v_i_se:
9314 case Intrinsic::riscv_sf_vc_v_xv_se:
9316 case Intrinsic::riscv_sf_vc_v_iv_se:
9318 case Intrinsic::riscv_sf_vc_v_vv_se:
9320 case Intrinsic::riscv_sf_vc_v_fv_se:
9322 case Intrinsic::riscv_sf_vc_v_xvv_se:
9324 case Intrinsic::riscv_sf_vc_v_ivv_se:
9326 case Intrinsic::riscv_sf_vc_v_vvv_se:
9328 case Intrinsic::riscv_sf_vc_v_fvv_se:
9330 case Intrinsic::riscv_sf_vc_v_xvw_se:
9332 case Intrinsic::riscv_sf_vc_v_ivw_se:
9334 case Intrinsic::riscv_sf_vc_v_vvw_se:
9336 case Intrinsic::riscv_sf_vc_v_fvw_se:
9338 }
9339
9340 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9341}
9342
9343SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9344 SelectionDAG &DAG) const {
9345 unsigned IntNo = Op.getConstantOperandVal(1);
9346 switch (IntNo) {
9347 default:
9348 break;
9349 case Intrinsic::riscv_masked_strided_store: {
9350 SDLoc DL(Op);
9351 MVT XLenVT = Subtarget.getXLenVT();
9352
9353 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9354 // the selection of the masked intrinsics doesn't do this for us.
9355 SDValue Mask = Op.getOperand(5);
9356 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9357
9358 SDValue Val = Op.getOperand(2);
9359 MVT VT = Val.getSimpleValueType();
9360 MVT ContainerVT = VT;
9361 if (VT.isFixedLengthVector()) {
9362 ContainerVT = getContainerForFixedLengthVector(VT);
9363 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9364 }
9365 if (!IsUnmasked) {
9366 MVT MaskVT = getMaskTypeFor(ContainerVT);
9367 if (VT.isFixedLengthVector())
9368 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9369 }
9370
9371 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9372
9373 SDValue IntID = DAG.getTargetConstant(
9374 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9375 XLenVT);
9376
9377 auto *Store = cast<MemIntrinsicSDNode>(Op);
9378 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9379 Ops.push_back(Val);
9380 Ops.push_back(Op.getOperand(3)); // Ptr
9381 Ops.push_back(Op.getOperand(4)); // Stride
9382 if (!IsUnmasked)
9383 Ops.push_back(Mask);
9384 Ops.push_back(VL);
9385
9386 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9387 Ops, Store->getMemoryVT(),
9388 Store->getMemOperand());
9389 }
9390 case Intrinsic::riscv_seg2_store:
9391 case Intrinsic::riscv_seg3_store:
9392 case Intrinsic::riscv_seg4_store:
9393 case Intrinsic::riscv_seg5_store:
9394 case Intrinsic::riscv_seg6_store:
9395 case Intrinsic::riscv_seg7_store:
9396 case Intrinsic::riscv_seg8_store: {
9397 SDLoc DL(Op);
9398 static const Intrinsic::ID VssegInts[] = {
9399 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9400 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9401 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9402 Intrinsic::riscv_vsseg8};
9403 // Operands are (chain, int_id, vec*, ptr, vl)
9404 unsigned NF = Op->getNumOperands() - 4;
9405 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9406 MVT XLenVT = Subtarget.getXLenVT();
9407 MVT VT = Op->getOperand(2).getSimpleValueType();
9408 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9409
9410 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9411 Subtarget);
9412 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9413 SDValue Ptr = Op->getOperand(NF + 2);
9414
9415 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9416 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9417 for (unsigned i = 0; i < NF; i++)
9419 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9420 Ops.append({Ptr, VL});
9421
9422 return DAG.getMemIntrinsicNode(
9423 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9424 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9425 }
9426 case Intrinsic::riscv_sf_vc_xv_se:
9428 case Intrinsic::riscv_sf_vc_iv_se:
9430 case Intrinsic::riscv_sf_vc_vv_se:
9432 case Intrinsic::riscv_sf_vc_fv_se:
9434 case Intrinsic::riscv_sf_vc_xvv_se:
9436 case Intrinsic::riscv_sf_vc_ivv_se:
9438 case Intrinsic::riscv_sf_vc_vvv_se:
9440 case Intrinsic::riscv_sf_vc_fvv_se:
9442 case Intrinsic::riscv_sf_vc_xvw_se:
9444 case Intrinsic::riscv_sf_vc_ivw_se:
9446 case Intrinsic::riscv_sf_vc_vvw_se:
9448 case Intrinsic::riscv_sf_vc_fvw_se:
9450 }
9451
9452 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9453}
9454
9455static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9456 switch (ISDOpcode) {
9457 default:
9458 llvm_unreachable("Unhandled reduction");
9459 case ISD::VP_REDUCE_ADD:
9460 case ISD::VECREDUCE_ADD:
9462 case ISD::VP_REDUCE_UMAX:
9465 case ISD::VP_REDUCE_SMAX:
9468 case ISD::VP_REDUCE_UMIN:
9471 case ISD::VP_REDUCE_SMIN:
9474 case ISD::VP_REDUCE_AND:
9475 case ISD::VECREDUCE_AND:
9477 case ISD::VP_REDUCE_OR:
9478 case ISD::VECREDUCE_OR:
9480 case ISD::VP_REDUCE_XOR:
9481 case ISD::VECREDUCE_XOR:
9483 case ISD::VP_REDUCE_FADD:
9485 case ISD::VP_REDUCE_SEQ_FADD:
9487 case ISD::VP_REDUCE_FMAX:
9489 case ISD::VP_REDUCE_FMIN:
9491 }
9492
9493}
9494
9495SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9496 SelectionDAG &DAG,
9497 bool IsVP) const {
9498 SDLoc DL(Op);
9499 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9500 MVT VecVT = Vec.getSimpleValueType();
9501 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9502 Op.getOpcode() == ISD::VECREDUCE_OR ||
9503 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9504 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9505 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9506 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9507 "Unexpected reduction lowering");
9508
9509 MVT XLenVT = Subtarget.getXLenVT();
9510
9511 MVT ContainerVT = VecVT;
9512 if (VecVT.isFixedLengthVector()) {
9513 ContainerVT = getContainerForFixedLengthVector(VecVT);
9514 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9515 }
9516
9517 SDValue Mask, VL;
9518 if (IsVP) {
9519 Mask = Op.getOperand(2);
9520 VL = Op.getOperand(3);
9521 } else {
9522 std::tie(Mask, VL) =
9523 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9524 }
9525
9526 unsigned BaseOpc;
9528 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9529
9530 switch (Op.getOpcode()) {
9531 default:
9532 llvm_unreachable("Unhandled reduction");
9533 case ISD::VECREDUCE_AND:
9534 case ISD::VP_REDUCE_AND: {
9535 // vcpop ~x == 0
9536 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9537 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9538 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9539 CC = ISD::SETEQ;
9540 BaseOpc = ISD::AND;
9541 break;
9542 }
9543 case ISD::VECREDUCE_OR:
9544 case ISD::VP_REDUCE_OR:
9545 // vcpop x != 0
9546 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9547 CC = ISD::SETNE;
9548 BaseOpc = ISD::OR;
9549 break;
9550 case ISD::VECREDUCE_XOR:
9551 case ISD::VP_REDUCE_XOR: {
9552 // ((vcpop x) & 1) != 0
9553 SDValue One = DAG.getConstant(1, DL, XLenVT);
9554 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9555 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9556 CC = ISD::SETNE;
9557 BaseOpc = ISD::XOR;
9558 break;
9559 }
9560 }
9561
9562 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9563 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9564
9565 if (!IsVP)
9566 return SetCC;
9567
9568 // Now include the start value in the operation.
9569 // Note that we must return the start value when no elements are operated
9570 // upon. The vcpop instructions we've emitted in each case above will return
9571 // 0 for an inactive vector, and so we've already received the neutral value:
9572 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9573 // can simply include the start value.
9574 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9575}
9576
9577static bool isNonZeroAVL(SDValue AVL) {
9578 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9579 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9580 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9581 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9582}
9583
9584/// Helper to lower a reduction sequence of the form:
9585/// scalar = reduce_op vec, scalar_start
9586static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9587 SDValue StartValue, SDValue Vec, SDValue Mask,
9588 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9589 const RISCVSubtarget &Subtarget) {
9590 const MVT VecVT = Vec.getSimpleValueType();
9591 const MVT M1VT = getLMUL1VT(VecVT);
9592 const MVT XLenVT = Subtarget.getXLenVT();
9593 const bool NonZeroAVL = isNonZeroAVL(VL);
9594
9595 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9596 // or the original VT if fractional.
9597 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9598 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9599 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9600 // be the result of the reduction operation.
9601 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9602 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9603 DAG, Subtarget);
9604 if (M1VT != InnerVT)
9605 InitialValue =
9606 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9607 InitialValue, DAG.getVectorIdxConstant(0, DL));
9608 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9609 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9610 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9611 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9612 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9613 DAG.getVectorIdxConstant(0, DL));
9614}
9615
9616SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9617 SelectionDAG &DAG) const {
9618 SDLoc DL(Op);
9619 SDValue Vec = Op.getOperand(0);
9620 EVT VecEVT = Vec.getValueType();
9621
9622 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9623
9624 // Due to ordering in legalize types we may have a vector type that needs to
9625 // be split. Do that manually so we can get down to a legal type.
9626 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9628 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9629 VecEVT = Lo.getValueType();
9630 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9631 }
9632
9633 // TODO: The type may need to be widened rather than split. Or widened before
9634 // it can be split.
9635 if (!isTypeLegal(VecEVT))
9636 return SDValue();
9637
9638 MVT VecVT = VecEVT.getSimpleVT();
9639 MVT VecEltVT = VecVT.getVectorElementType();
9640 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9641
9642 MVT ContainerVT = VecVT;
9643 if (VecVT.isFixedLengthVector()) {
9644 ContainerVT = getContainerForFixedLengthVector(VecVT);
9645 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9646 }
9647
9648 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9649
9650 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9651 switch (BaseOpc) {
9652 case ISD::AND:
9653 case ISD::OR:
9654 case ISD::UMAX:
9655 case ISD::UMIN:
9656 case ISD::SMAX:
9657 case ISD::SMIN:
9658 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9659 DAG.getVectorIdxConstant(0, DL));
9660 }
9661 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9662 Mask, VL, DL, DAG, Subtarget);
9663}
9664
9665// Given a reduction op, this function returns the matching reduction opcode,
9666// the vector SDValue and the scalar SDValue required to lower this to a
9667// RISCVISD node.
9668static std::tuple<unsigned, SDValue, SDValue>
9670 const RISCVSubtarget &Subtarget) {
9671 SDLoc DL(Op);
9672 auto Flags = Op->getFlags();
9673 unsigned Opcode = Op.getOpcode();
9674 switch (Opcode) {
9675 default:
9676 llvm_unreachable("Unhandled reduction");
9677 case ISD::VECREDUCE_FADD: {
9678 // Use positive zero if we can. It is cheaper to materialize.
9679 SDValue Zero =
9680 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9681 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9682 }
9684 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9685 Op.getOperand(0));
9689 case ISD::VECREDUCE_FMAX: {
9690 SDValue Front =
9691 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9692 DAG.getVectorIdxConstant(0, DL));
9693 unsigned RVVOpc =
9694 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9697 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9698 }
9699 }
9700}
9701
9702SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9703 SelectionDAG &DAG) const {
9704 SDLoc DL(Op);
9705 MVT VecEltVT = Op.getSimpleValueType();
9706
9707 unsigned RVVOpcode;
9708 SDValue VectorVal, ScalarVal;
9709 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9710 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9711 MVT VecVT = VectorVal.getSimpleValueType();
9712
9713 MVT ContainerVT = VecVT;
9714 if (VecVT.isFixedLengthVector()) {
9715 ContainerVT = getContainerForFixedLengthVector(VecVT);
9716 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9717 }
9718
9719 MVT ResVT = Op.getSimpleValueType();
9720 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9721 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9722 VL, DL, DAG, Subtarget);
9723 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9724 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9725 return Res;
9726
9727 if (Op->getFlags().hasNoNaNs())
9728 return Res;
9729
9730 // Force output to NaN if any element is Nan.
9731 SDValue IsNan =
9732 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9733 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9734 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9735 MVT XLenVT = Subtarget.getXLenVT();
9736 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9737 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9738 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9739 return DAG.getSelect(
9740 DL, ResVT, NoNaNs, Res,
9742 ResVT));
9743}
9744
9745SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9746 SelectionDAG &DAG) const {
9747 SDLoc DL(Op);
9748 SDValue Vec = Op.getOperand(1);
9749 EVT VecEVT = Vec.getValueType();
9750
9751 // TODO: The type may need to be widened rather than split. Or widened before
9752 // it can be split.
9753 if (!isTypeLegal(VecEVT))
9754 return SDValue();
9755
9756 MVT VecVT = VecEVT.getSimpleVT();
9757 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9758
9759 if (VecVT.isFixedLengthVector()) {
9760 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9761 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9762 }
9763
9764 SDValue VL = Op.getOperand(3);
9765 SDValue Mask = Op.getOperand(2);
9766 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9767 Vec, Mask, VL, DL, DAG, Subtarget);
9768}
9769
9770SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9771 SelectionDAG &DAG) const {
9772 SDValue Vec = Op.getOperand(0);
9773 SDValue SubVec = Op.getOperand(1);
9774 MVT VecVT = Vec.getSimpleValueType();
9775 MVT SubVecVT = SubVec.getSimpleValueType();
9776
9777 SDLoc DL(Op);
9778 MVT XLenVT = Subtarget.getXLenVT();
9779 unsigned OrigIdx = Op.getConstantOperandVal(2);
9780 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9781
9782 // We don't have the ability to slide mask vectors up indexed by their i1
9783 // elements; the smallest we can do is i8. Often we are able to bitcast to
9784 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9785 // into a scalable one, we might not necessarily have enough scalable
9786 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9787 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9788 (OrigIdx != 0 || !Vec.isUndef())) {
9789 if (VecVT.getVectorMinNumElements() >= 8 &&
9790 SubVecVT.getVectorMinNumElements() >= 8) {
9791 assert(OrigIdx % 8 == 0 && "Invalid index");
9792 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9793 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9794 "Unexpected mask vector lowering");
9795 OrigIdx /= 8;
9796 SubVecVT =
9797 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9798 SubVecVT.isScalableVector());
9799 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9800 VecVT.isScalableVector());
9801 Vec = DAG.getBitcast(VecVT, Vec);
9802 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9803 } else {
9804 // We can't slide this mask vector up indexed by its i1 elements.
9805 // This poses a problem when we wish to insert a scalable vector which
9806 // can't be re-expressed as a larger type. Just choose the slow path and
9807 // extend to a larger type, then truncate back down.
9808 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9809 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9810 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9811 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9812 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9813 Op.getOperand(2));
9814 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9815 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9816 }
9817 }
9818
9819 // If the subvector vector is a fixed-length type and we don't know VLEN
9820 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9821 // don't know which register of a LMUL group contains the specific subvector
9822 // as we only know the minimum register size. Therefore we must slide the
9823 // vector group up the full amount.
9824 const auto VLen = Subtarget.getRealVLen();
9825 if (SubVecVT.isFixedLengthVector() && !VLen) {
9826 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9827 return Op;
9828 MVT ContainerVT = VecVT;
9829 if (VecVT.isFixedLengthVector()) {
9830 ContainerVT = getContainerForFixedLengthVector(VecVT);
9831 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9832 }
9833
9834 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9835 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9836 DAG.getUNDEF(ContainerVT), SubVec,
9837 DAG.getVectorIdxConstant(0, DL));
9838 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9839 return DAG.getBitcast(Op.getValueType(), SubVec);
9840 }
9841
9842 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9843 DAG.getUNDEF(ContainerVT), SubVec,
9844 DAG.getVectorIdxConstant(0, DL));
9845 SDValue Mask =
9846 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9847 // Set the vector length to only the number of elements we care about. Note
9848 // that for slideup this includes the offset.
9849 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9850 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9851
9852 // Use tail agnostic policy if we're inserting over Vec's tail.
9854 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9855 Policy = RISCVII::TAIL_AGNOSTIC;
9856
9857 // If we're inserting into the lowest elements, use a tail undisturbed
9858 // vmv.v.v.
9859 if (OrigIdx == 0) {
9860 SubVec =
9861 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9862 } else {
9863 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9864 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9865 SlideupAmt, Mask, VL, Policy);
9866 }
9867
9868 if (VecVT.isFixedLengthVector())
9869 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9870 return DAG.getBitcast(Op.getValueType(), SubVec);
9871 }
9872
9873 MVT ContainerVecVT = VecVT;
9874 if (VecVT.isFixedLengthVector()) {
9875 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
9876 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
9877 }
9878
9879 MVT ContainerSubVecVT = SubVecVT;
9880 if (SubVecVT.isFixedLengthVector()) {
9881 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9882 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
9883 }
9884
9885 unsigned SubRegIdx;
9886 ElementCount RemIdx;
9887 // insert_subvector scales the index by vscale if the subvector is scalable,
9888 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9889 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
9890 if (SubVecVT.isFixedLengthVector()) {
9891 assert(VLen);
9892 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9893 auto Decompose =
9895 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9896 SubRegIdx = Decompose.first;
9897 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9898 (OrigIdx % Vscale));
9899 } else {
9900 auto Decompose =
9902 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
9903 SubRegIdx = Decompose.first;
9904 RemIdx = ElementCount::getScalable(Decompose.second);
9905 }
9906
9909 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
9910 bool ExactlyVecRegSized =
9911 Subtarget.expandVScale(SubVecVT.getSizeInBits())
9912 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
9913
9914 // 1. If the Idx has been completely eliminated and this subvector's size is
9915 // a vector register or a multiple thereof, or the surrounding elements are
9916 // undef, then this is a subvector insert which naturally aligns to a vector
9917 // register. These can easily be handled using subregister manipulation.
9918 // 2. If the subvector isn't an exact multiple of a valid register group size,
9919 // then the insertion must preserve the undisturbed elements of the register.
9920 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
9921 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
9922 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
9923 // of that LMUL=1 type back into the larger vector (resolving to another
9924 // subregister operation). See below for how our VSLIDEUP works. We go via a
9925 // LMUL=1 type to avoid allocating a large register group to hold our
9926 // subvector.
9927 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
9928 if (SubVecVT.isFixedLengthVector()) {
9929 // We may get NoSubRegister if inserting at index 0 and the subvec
9930 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
9931 if (SubRegIdx == RISCV::NoSubRegister) {
9932 assert(OrigIdx == 0);
9933 return Op;
9934 }
9935
9936 SDValue Insert =
9937 DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
9938 if (VecVT.isFixedLengthVector())
9939 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
9940 return Insert;
9941 }
9942 return Op;
9943 }
9944
9945 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9946 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9947 // (in our case undisturbed). This means we can set up a subvector insertion
9948 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9949 // size of the subvector.
9950 MVT InterSubVT = ContainerVecVT;
9951 SDValue AlignedExtract = Vec;
9952 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
9953 if (SubVecVT.isFixedLengthVector())
9954 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
9955 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
9956 InterSubVT = getLMUL1VT(ContainerVecVT);
9957 // Extract a subvector equal to the nearest full vector register type. This
9958 // should resolve to a EXTRACT_SUBREG instruction.
9959 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9960 DAG.getVectorIdxConstant(AlignedIdx, DL));
9961 }
9962
9963 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9964 DAG.getUNDEF(InterSubVT), SubVec,
9965 DAG.getVectorIdxConstant(0, DL));
9966
9967 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
9968
9969 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
9970 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
9971
9972 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
9974 if (Subtarget.expandVScale(EndIndex) ==
9975 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
9976 Policy = RISCVII::TAIL_AGNOSTIC;
9977
9978 // If we're inserting into the lowest elements, use a tail undisturbed
9979 // vmv.v.v.
9980 if (RemIdx.isZero()) {
9981 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9982 SubVec, VL);
9983 } else {
9984 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
9985
9986 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9987 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9988
9989 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9990 SlideupAmt, Mask, VL, Policy);
9991 }
9992
9993 // If required, insert this subvector back into the correct vector register.
9994 // This should resolve to an INSERT_SUBREG instruction.
9995 if (ContainerVecVT.bitsGT(InterSubVT))
9996 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
9997 DAG.getVectorIdxConstant(AlignedIdx, DL));
9998
9999 if (VecVT.isFixedLengthVector())
10000 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10001
10002 // We might have bitcast from a mask type: cast back to the original type if
10003 // required.
10004 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10005}
10006
10007SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10008 SelectionDAG &DAG) const {
10009 SDValue Vec = Op.getOperand(0);
10010 MVT SubVecVT = Op.getSimpleValueType();
10011 MVT VecVT = Vec.getSimpleValueType();
10012
10013 SDLoc DL(Op);
10014 MVT XLenVT = Subtarget.getXLenVT();
10015 unsigned OrigIdx = Op.getConstantOperandVal(1);
10016 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10017
10018 // We don't have the ability to slide mask vectors down indexed by their i1
10019 // elements; the smallest we can do is i8. Often we are able to bitcast to
10020 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10021 // from a scalable one, we might not necessarily have enough scalable
10022 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10023 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
10024 if (VecVT.getVectorMinNumElements() >= 8 &&
10025 SubVecVT.getVectorMinNumElements() >= 8) {
10026 assert(OrigIdx % 8 == 0 && "Invalid index");
10027 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10028 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10029 "Unexpected mask vector lowering");
10030 OrigIdx /= 8;
10031 SubVecVT =
10032 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10033 SubVecVT.isScalableVector());
10034 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10035 VecVT.isScalableVector());
10036 Vec = DAG.getBitcast(VecVT, Vec);
10037 } else {
10038 // We can't slide this mask vector down, indexed by its i1 elements.
10039 // This poses a problem when we wish to extract a scalable vector which
10040 // can't be re-expressed as a larger type. Just choose the slow path and
10041 // extend to a larger type, then truncate back down.
10042 // TODO: We could probably improve this when extracting certain fixed
10043 // from fixed, where we can extract as i8 and shift the correct element
10044 // right to reach the desired subvector?
10045 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10046 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10047 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10048 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10049 Op.getOperand(1));
10050 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10051 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10052 }
10053 }
10054
10055 // With an index of 0 this is a cast-like subvector, which can be performed
10056 // with subregister operations.
10057 if (OrigIdx == 0)
10058 return Op;
10059
10060 const auto VLen = Subtarget.getRealVLen();
10061
10062 // If the subvector vector is a fixed-length type and we don't know VLEN
10063 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10064 // don't know which register of a LMUL group contains the specific subvector
10065 // as we only know the minimum register size. Therefore we must slide the
10066 // vector group down the full amount.
10067 if (SubVecVT.isFixedLengthVector() && !VLen) {
10068 MVT ContainerVT = VecVT;
10069 if (VecVT.isFixedLengthVector()) {
10070 ContainerVT = getContainerForFixedLengthVector(VecVT);
10071 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10072 }
10073
10074 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10075 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10076 if (auto ShrunkVT =
10077 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10078 ContainerVT = *ShrunkVT;
10079 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10080 DAG.getVectorIdxConstant(0, DL));
10081 }
10082
10083 SDValue Mask =
10084 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10085 // Set the vector length to only the number of elements we care about. This
10086 // avoids sliding down elements we're going to discard straight away.
10087 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
10088 Subtarget);
10089 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10090 SDValue Slidedown =
10091 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10092 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10093 // Now we can use a cast-like subvector extract to get the result.
10094 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10095 DAG.getVectorIdxConstant(0, DL));
10096 return DAG.getBitcast(Op.getValueType(), Slidedown);
10097 }
10098
10099 if (VecVT.isFixedLengthVector()) {
10100 VecVT = getContainerForFixedLengthVector(VecVT);
10101 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10102 }
10103
10104 MVT ContainerSubVecVT = SubVecVT;
10105 if (SubVecVT.isFixedLengthVector())
10106 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10107
10108 unsigned SubRegIdx;
10109 ElementCount RemIdx;
10110 // extract_subvector scales the index by vscale if the subvector is scalable,
10111 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10112 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10113 if (SubVecVT.isFixedLengthVector()) {
10114 assert(VLen);
10115 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10116 auto Decompose =
10118 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10119 SubRegIdx = Decompose.first;
10120 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10121 (OrigIdx % Vscale));
10122 } else {
10123 auto Decompose =
10125 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10126 SubRegIdx = Decompose.first;
10127 RemIdx = ElementCount::getScalable(Decompose.second);
10128 }
10129
10130 // If the Idx has been completely eliminated then this is a subvector extract
10131 // which naturally aligns to a vector register. These can easily be handled
10132 // using subregister manipulation.
10133 if (RemIdx.isZero()) {
10134 if (SubVecVT.isFixedLengthVector()) {
10135 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10136 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10137 }
10138 return Op;
10139 }
10140
10141 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10142 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10143 // divide exactly.
10144 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10145 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10146
10147 // If the vector type is an LMUL-group type, extract a subvector equal to the
10148 // nearest full vector register type.
10149 MVT InterSubVT = VecVT;
10150 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10151 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10152 // we should have successfully decomposed the extract into a subregister.
10153 assert(SubRegIdx != RISCV::NoSubRegister);
10154 InterSubVT = getLMUL1VT(VecVT);
10155 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10156 }
10157
10158 // Slide this vector register down by the desired number of elements in order
10159 // to place the desired subvector starting at element 0.
10160 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10161 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10162 if (SubVecVT.isFixedLengthVector())
10163 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10164 Subtarget);
10165 SDValue Slidedown =
10166 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10167 Vec, SlidedownAmt, Mask, VL);
10168
10169 // Now the vector is in the right position, extract our final subvector. This
10170 // should resolve to a COPY.
10171 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10172 DAG.getVectorIdxConstant(0, DL));
10173
10174 // We might have bitcast from a mask type: cast back to the original type if
10175 // required.
10176 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10177}
10178
10179// Widen a vector's operands to i8, then truncate its results back to the
10180// original type, typically i1. All operand and result types must be the same.
10182 SelectionDAG &DAG) {
10183 MVT VT = N.getSimpleValueType();
10184 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10186 for (SDValue Op : N->ops()) {
10187 assert(Op.getSimpleValueType() == VT &&
10188 "Operands and result must be same type");
10189 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10190 }
10191
10192 unsigned NumVals = N->getNumValues();
10193
10195 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10196 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10197 SmallVector<SDValue, 4> TruncVals;
10198 for (unsigned I = 0; I < NumVals; I++) {
10199 TruncVals.push_back(
10200 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10201 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10202 }
10203
10204 if (TruncVals.size() > 1)
10205 return DAG.getMergeValues(TruncVals, DL);
10206 return TruncVals.front();
10207}
10208
10209SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10210 SelectionDAG &DAG) const {
10211 SDLoc DL(Op);
10212 MVT VecVT = Op.getSimpleValueType();
10213
10214 assert(VecVT.isScalableVector() &&
10215 "vector_interleave on non-scalable vector!");
10216
10217 // 1 bit element vectors need to be widened to e8
10218 if (VecVT.getVectorElementType() == MVT::i1)
10219 return widenVectorOpsToi8(Op, DL, DAG);
10220
10221 // If the VT is LMUL=8, we need to split and reassemble.
10222 if (VecVT.getSizeInBits().getKnownMinValue() ==
10223 (8 * RISCV::RVVBitsPerBlock)) {
10224 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10225 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10226 EVT SplitVT = Op0Lo.getValueType();
10227
10229 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10231 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10232
10233 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10234 ResLo.getValue(0), ResHi.getValue(0));
10235 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10236 ResHi.getValue(1));
10237 return DAG.getMergeValues({Even, Odd}, DL);
10238 }
10239
10240 // Concatenate the two vectors as one vector to deinterleave
10241 MVT ConcatVT =
10244 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10245 Op.getOperand(0), Op.getOperand(1));
10246
10247 // We want to operate on all lanes, so get the mask and VL and mask for it
10248 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10249 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10250
10251 // We can deinterleave through vnsrl.wi if the element type is smaller than
10252 // ELEN
10253 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10254 SDValue Even =
10255 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10256 SDValue Odd =
10257 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10258 return DAG.getMergeValues({Even, Odd}, DL);
10259 }
10260
10261 // For the indices, use the same SEW to avoid an extra vsetvli
10262 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10263 // Create a vector of even indices {0, 2, 4, ...}
10264 SDValue EvenIdx =
10265 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10266 // Create a vector of odd indices {1, 3, 5, ... }
10267 SDValue OddIdx =
10268 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10269
10270 // Gather the even and odd elements into two separate vectors
10271 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10272 Concat, EvenIdx, Passthru, Mask, VL);
10273 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10274 Concat, OddIdx, Passthru, Mask, VL);
10275
10276 // Extract the result half of the gather for even and odd
10277 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10278 DAG.getVectorIdxConstant(0, DL));
10279 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10280 DAG.getVectorIdxConstant(0, DL));
10281
10282 return DAG.getMergeValues({Even, Odd}, DL);
10283}
10284
10285SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10286 SelectionDAG &DAG) const {
10287 SDLoc DL(Op);
10288 MVT VecVT = Op.getSimpleValueType();
10289
10290 assert(VecVT.isScalableVector() &&
10291 "vector_interleave on non-scalable vector!");
10292
10293 // i1 vectors need to be widened to i8
10294 if (VecVT.getVectorElementType() == MVT::i1)
10295 return widenVectorOpsToi8(Op, DL, DAG);
10296
10297 MVT XLenVT = Subtarget.getXLenVT();
10298 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10299
10300 // If the VT is LMUL=8, we need to split and reassemble.
10301 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10302 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10303 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10304 EVT SplitVT = Op0Lo.getValueType();
10305
10307 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10309 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10310
10311 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10312 ResLo.getValue(0), ResLo.getValue(1));
10313 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10314 ResHi.getValue(0), ResHi.getValue(1));
10315 return DAG.getMergeValues({Lo, Hi}, DL);
10316 }
10317
10318 SDValue Interleaved;
10319
10320 // If the element type is smaller than ELEN, then we can interleave with
10321 // vwaddu.vv and vwmaccu.vx
10322 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10323 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10324 DAG, Subtarget);
10325 } else {
10326 // Otherwise, fallback to using vrgathere16.vv
10327 MVT ConcatVT =
10330 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10331 Op.getOperand(0), Op.getOperand(1));
10332
10333 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10334
10335 // 0 1 2 3 4 5 6 7 ...
10336 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10337
10338 // 1 1 1 1 1 1 1 1 ...
10339 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10340
10341 // 1 0 1 0 1 0 1 0 ...
10342 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10343 OddMask = DAG.getSetCC(
10344 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10345 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10347
10348 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10349
10350 // Build up the index vector for interleaving the concatenated vector
10351 // 0 0 1 1 2 2 3 3 ...
10352 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10353 // 0 n 1 n+1 2 n+2 3 n+3 ...
10354 Idx =
10355 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10356
10357 // Then perform the interleave
10358 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10359 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10360 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10361 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10362 }
10363
10364 // Extract the two halves from the interleaved result
10365 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10366 DAG.getVectorIdxConstant(0, DL));
10367 SDValue Hi = DAG.getNode(
10368 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10370
10371 return DAG.getMergeValues({Lo, Hi}, DL);
10372}
10373
10374// Lower step_vector to the vid instruction. Any non-identity step value must
10375// be accounted for my manual expansion.
10376SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10377 SelectionDAG &DAG) const {
10378 SDLoc DL(Op);
10379 MVT VT = Op.getSimpleValueType();
10380 assert(VT.isScalableVector() && "Expected scalable vector");
10381 MVT XLenVT = Subtarget.getXLenVT();
10382 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10383 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10384 uint64_t StepValImm = Op.getConstantOperandVal(0);
10385 if (StepValImm != 1) {
10386 if (isPowerOf2_64(StepValImm)) {
10387 SDValue StepVal =
10388 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10389 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10390 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10391 } else {
10392 SDValue StepVal = lowerScalarSplat(
10393 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10394 VL, VT, DL, DAG, Subtarget);
10395 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10396 }
10397 }
10398 return StepVec;
10399}
10400
10401// Implement vector_reverse using vrgather.vv with indices determined by
10402// subtracting the id of each element from (VLMAX-1). This will convert
10403// the indices like so:
10404// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10405// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10406SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10407 SelectionDAG &DAG) const {
10408 SDLoc DL(Op);
10409 MVT VecVT = Op.getSimpleValueType();
10410 if (VecVT.getVectorElementType() == MVT::i1) {
10411 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10412 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10413 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10414 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10415 }
10416 unsigned EltSize = VecVT.getScalarSizeInBits();
10417 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10418 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10419 unsigned MaxVLMAX =
10420 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10421
10422 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10423 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10424
10425 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10426 // to use vrgatherei16.vv.
10427 // TODO: It's also possible to use vrgatherei16.vv for other types to
10428 // decrease register width for the index calculation.
10429 if (MaxVLMAX > 256 && EltSize == 8) {
10430 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10431 // Reverse each half, then reassemble them in reverse order.
10432 // NOTE: It's also possible that after splitting that VLMAX no longer
10433 // requires vrgatherei16.vv.
10434 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10435 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10436 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10437 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10438 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10439 // Reassemble the low and high pieces reversed.
10440 // FIXME: This is a CONCAT_VECTORS.
10441 SDValue Res =
10442 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10443 DAG.getVectorIdxConstant(0, DL));
10444 return DAG.getNode(
10445 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10446 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10447 }
10448
10449 // Just promote the int type to i16 which will double the LMUL.
10450 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10451 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10452 }
10453
10454 MVT XLenVT = Subtarget.getXLenVT();
10455 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10456
10457 // Calculate VLMAX-1 for the desired SEW.
10458 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10459 computeVLMax(VecVT, DL, DAG),
10460 DAG.getConstant(1, DL, XLenVT));
10461
10462 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10463 bool IsRV32E64 =
10464 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10465 SDValue SplatVL;
10466 if (!IsRV32E64)
10467 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10468 else
10469 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10470 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10471
10472 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10473 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10474 DAG.getUNDEF(IntVT), Mask, VL);
10475
10476 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10477 DAG.getUNDEF(VecVT), Mask, VL);
10478}
10479
10480SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10481 SelectionDAG &DAG) const {
10482 SDLoc DL(Op);
10483 SDValue V1 = Op.getOperand(0);
10484 SDValue V2 = Op.getOperand(1);
10485 MVT XLenVT = Subtarget.getXLenVT();
10486 MVT VecVT = Op.getSimpleValueType();
10487
10488 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10489
10490 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10491 SDValue DownOffset, UpOffset;
10492 if (ImmValue >= 0) {
10493 // The operand is a TargetConstant, we need to rebuild it as a regular
10494 // constant.
10495 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10496 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10497 } else {
10498 // The operand is a TargetConstant, we need to rebuild it as a regular
10499 // constant rather than negating the original operand.
10500 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10501 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10502 }
10503
10504 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10505
10506 SDValue SlideDown =
10507 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10508 DownOffset, TrueMask, UpOffset);
10509 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10510 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10512}
10513
10514SDValue
10515RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10516 SelectionDAG &DAG) const {
10517 SDLoc DL(Op);
10518 auto *Load = cast<LoadSDNode>(Op);
10519
10521 Load->getMemoryVT(),
10522 *Load->getMemOperand()) &&
10523 "Expecting a correctly-aligned load");
10524
10525 MVT VT = Op.getSimpleValueType();
10526 MVT XLenVT = Subtarget.getXLenVT();
10527 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10528
10529 // If we know the exact VLEN and our fixed length vector completely fills
10530 // the container, use a whole register load instead.
10531 const auto [MinVLMAX, MaxVLMAX] =
10532 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10533 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10534 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10535 MachineMemOperand *MMO = Load->getMemOperand();
10536 SDValue NewLoad =
10537 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10538 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10539 MMO->getAAInfo(), MMO->getRanges());
10540 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10541 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10542 }
10543
10544 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10545
10546 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10547 SDValue IntID = DAG.getTargetConstant(
10548 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10549 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10550 if (!IsMaskOp)
10551 Ops.push_back(DAG.getUNDEF(ContainerVT));
10552 Ops.push_back(Load->getBasePtr());
10553 Ops.push_back(VL);
10554 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10555 SDValue NewLoad =
10557 Load->getMemoryVT(), Load->getMemOperand());
10558
10559 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10560 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10561}
10562
10563SDValue
10564RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10565 SelectionDAG &DAG) const {
10566 SDLoc DL(Op);
10567 auto *Store = cast<StoreSDNode>(Op);
10568
10570 Store->getMemoryVT(),
10571 *Store->getMemOperand()) &&
10572 "Expecting a correctly-aligned store");
10573
10574 SDValue StoreVal = Store->getValue();
10575 MVT VT = StoreVal.getSimpleValueType();
10576 MVT XLenVT = Subtarget.getXLenVT();
10577
10578 // If the size less than a byte, we need to pad with zeros to make a byte.
10579 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10580 VT = MVT::v8i1;
10581 StoreVal =
10582 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10583 StoreVal, DAG.getVectorIdxConstant(0, DL));
10584 }
10585
10586 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10587
10588 SDValue NewValue =
10589 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10590
10591
10592 // If we know the exact VLEN and our fixed length vector completely fills
10593 // the container, use a whole register store instead.
10594 const auto [MinVLMAX, MaxVLMAX] =
10595 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10596 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10597 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10598 MachineMemOperand *MMO = Store->getMemOperand();
10599 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10600 MMO->getPointerInfo(), MMO->getBaseAlign(),
10601 MMO->getFlags(), MMO->getAAInfo());
10602 }
10603
10604 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10605 Subtarget);
10606
10607 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10608 SDValue IntID = DAG.getTargetConstant(
10609 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10610 return DAG.getMemIntrinsicNode(
10611 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10612 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10613 Store->getMemoryVT(), Store->getMemOperand());
10614}
10615
10616SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10617 SelectionDAG &DAG) const {
10618 SDLoc DL(Op);
10619 MVT VT = Op.getSimpleValueType();
10620
10621 const auto *MemSD = cast<MemSDNode>(Op);
10622 EVT MemVT = MemSD->getMemoryVT();
10623 MachineMemOperand *MMO = MemSD->getMemOperand();
10624 SDValue Chain = MemSD->getChain();
10625 SDValue BasePtr = MemSD->getBasePtr();
10626
10627 SDValue Mask, PassThru, VL;
10628 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10629 Mask = VPLoad->getMask();
10630 PassThru = DAG.getUNDEF(VT);
10631 VL = VPLoad->getVectorLength();
10632 } else {
10633 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10634 Mask = MLoad->getMask();
10635 PassThru = MLoad->getPassThru();
10636 }
10637
10638 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10639
10640 MVT XLenVT = Subtarget.getXLenVT();
10641
10642 MVT ContainerVT = VT;
10643 if (VT.isFixedLengthVector()) {
10644 ContainerVT = getContainerForFixedLengthVector(VT);
10645 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10646 if (!IsUnmasked) {
10647 MVT MaskVT = getMaskTypeFor(ContainerVT);
10648 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10649 }
10650 }
10651
10652 if (!VL)
10653 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10654
10655 unsigned IntID =
10656 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10657 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10658 if (IsUnmasked)
10659 Ops.push_back(DAG.getUNDEF(ContainerVT));
10660 else
10661 Ops.push_back(PassThru);
10662 Ops.push_back(BasePtr);
10663 if (!IsUnmasked)
10664 Ops.push_back(Mask);
10665 Ops.push_back(VL);
10666 if (!IsUnmasked)
10668
10669 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10670
10671 SDValue Result =
10672 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10673 Chain = Result.getValue(1);
10674
10675 if (VT.isFixedLengthVector())
10676 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10677
10678 return DAG.getMergeValues({Result, Chain}, DL);
10679}
10680
10681SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10682 SelectionDAG &DAG) const {
10683 SDLoc DL(Op);
10684
10685 const auto *MemSD = cast<MemSDNode>(Op);
10686 EVT MemVT = MemSD->getMemoryVT();
10687 MachineMemOperand *MMO = MemSD->getMemOperand();
10688 SDValue Chain = MemSD->getChain();
10689 SDValue BasePtr = MemSD->getBasePtr();
10690 SDValue Val, Mask, VL;
10691
10692 bool IsCompressingStore = false;
10693 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10694 Val = VPStore->getValue();
10695 Mask = VPStore->getMask();
10696 VL = VPStore->getVectorLength();
10697 } else {
10698 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10699 Val = MStore->getValue();
10700 Mask = MStore->getMask();
10701 IsCompressingStore = MStore->isCompressingStore();
10702 }
10703
10704 bool IsUnmasked =
10705 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10706
10707 MVT VT = Val.getSimpleValueType();
10708 MVT XLenVT = Subtarget.getXLenVT();
10709
10710 MVT ContainerVT = VT;
10711 if (VT.isFixedLengthVector()) {
10712 ContainerVT = getContainerForFixedLengthVector(VT);
10713
10714 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10715 if (!IsUnmasked || IsCompressingStore) {
10716 MVT MaskVT = getMaskTypeFor(ContainerVT);
10717 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10718 }
10719 }
10720
10721 if (!VL)
10722 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10723
10724 if (IsCompressingStore) {
10725 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10726 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10727 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10728 VL =
10729 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10730 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10731 }
10732
10733 unsigned IntID =
10734 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10735 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10736 Ops.push_back(Val);
10737 Ops.push_back(BasePtr);
10738 if (!IsUnmasked)
10739 Ops.push_back(Mask);
10740 Ops.push_back(VL);
10741
10743 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10744}
10745
10746SDValue
10747RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10748 SelectionDAG &DAG) const {
10749 MVT InVT = Op.getOperand(0).getSimpleValueType();
10750 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10751
10752 MVT VT = Op.getSimpleValueType();
10753
10754 SDValue Op1 =
10755 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10756 SDValue Op2 =
10757 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10758
10759 SDLoc DL(Op);
10760 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10761 DAG, Subtarget);
10762 MVT MaskVT = getMaskTypeFor(ContainerVT);
10763
10764 SDValue Cmp =
10765 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10766 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10767
10768 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10769}
10770
10771SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10772 SelectionDAG &DAG) const {
10773 unsigned Opc = Op.getOpcode();
10774 SDLoc DL(Op);
10775 SDValue Chain = Op.getOperand(0);
10776 SDValue Op1 = Op.getOperand(1);
10777 SDValue Op2 = Op.getOperand(2);
10778 SDValue CC = Op.getOperand(3);
10779 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10780 MVT VT = Op.getSimpleValueType();
10781 MVT InVT = Op1.getSimpleValueType();
10782
10783 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10784 // condition code.
10785 if (Opc == ISD::STRICT_FSETCCS) {
10786 // Expand strict_fsetccs(x, oeq) to
10787 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10788 SDVTList VTList = Op->getVTList();
10789 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10790 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10791 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10792 Op2, OLECCVal);
10793 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10794 Op1, OLECCVal);
10795 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10796 Tmp1.getValue(1), Tmp2.getValue(1));
10797 // Tmp1 and Tmp2 might be the same node.
10798 if (Tmp1 != Tmp2)
10799 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10800 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10801 }
10802
10803 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10804 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10805 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10806 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10807 Op2, OEQCCVal);
10808 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10809 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10810 }
10811 }
10812
10813 MVT ContainerInVT = InVT;
10814 if (InVT.isFixedLengthVector()) {
10815 ContainerInVT = getContainerForFixedLengthVector(InVT);
10816 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10817 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10818 }
10819 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10820
10821 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10822
10823 SDValue Res;
10824 if (Opc == ISD::STRICT_FSETCC &&
10825 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10826 CCVal == ISD::SETOLE)) {
10827 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10828 // active when both input elements are ordered.
10829 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10830 SDValue OrderMask1 = DAG.getNode(
10831 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10832 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10833 True, VL});
10834 SDValue OrderMask2 = DAG.getNode(
10835 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10836 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10837 True, VL});
10838 Mask =
10839 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10840 // Use Mask as the merge operand to let the result be 0 if either of the
10841 // inputs is unordered.
10843 DAG.getVTList(MaskVT, MVT::Other),
10844 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10845 } else {
10846 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10848 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10849 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10850 }
10851
10852 if (VT.isFixedLengthVector()) {
10853 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10854 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10855 }
10856 return Res;
10857}
10858
10859// Lower vector ABS to smax(X, sub(0, X)).
10860SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10861 SDLoc DL(Op);
10862 MVT VT = Op.getSimpleValueType();
10863 SDValue X = Op.getOperand(0);
10864
10865 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10866 "Unexpected type for ISD::ABS");
10867
10868 MVT ContainerVT = VT;
10869 if (VT.isFixedLengthVector()) {
10870 ContainerVT = getContainerForFixedLengthVector(VT);
10871 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10872 }
10873
10874 SDValue Mask, VL;
10875 if (Op->getOpcode() == ISD::VP_ABS) {
10876 Mask = Op->getOperand(1);
10877 if (VT.isFixedLengthVector())
10878 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10879 Subtarget);
10880 VL = Op->getOperand(2);
10881 } else
10882 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10883
10884 SDValue SplatZero = DAG.getNode(
10885 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10886 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10887 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10888 DAG.getUNDEF(ContainerVT), Mask, VL);
10889 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10890 DAG.getUNDEF(ContainerVT), Mask, VL);
10891
10892 if (VT.isFixedLengthVector())
10893 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10894 return Max;
10895}
10896
10897SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10898 SDValue Op, SelectionDAG &DAG) const {
10899 SDLoc DL(Op);
10900 MVT VT = Op.getSimpleValueType();
10901 SDValue Mag = Op.getOperand(0);
10902 SDValue Sign = Op.getOperand(1);
10903 assert(Mag.getValueType() == Sign.getValueType() &&
10904 "Can only handle COPYSIGN with matching types.");
10905
10906 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10907 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10908 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10909
10910 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10911
10912 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10913 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10914
10915 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10916}
10917
10918SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10919 SDValue Op, SelectionDAG &DAG) const {
10920 MVT VT = Op.getSimpleValueType();
10921 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10922
10923 MVT I1ContainerVT =
10924 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10925
10926 SDValue CC =
10927 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10928 SDValue Op1 =
10929 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10930 SDValue Op2 =
10931 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10932
10933 SDLoc DL(Op);
10934 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10935
10936 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
10937 Op2, DAG.getUNDEF(ContainerVT), VL);
10938
10939 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10940}
10941
10942SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10943 SelectionDAG &DAG) const {
10944 unsigned NewOpc = getRISCVVLOp(Op);
10945 bool HasMergeOp = hasMergeOp(NewOpc);
10946 bool HasMask = hasMaskOp(NewOpc);
10947
10948 MVT VT = Op.getSimpleValueType();
10949 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10950
10951 // Create list of operands by converting existing ones to scalable types.
10953 for (const SDValue &V : Op->op_values()) {
10954 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10955
10956 // Pass through non-vector operands.
10957 if (!V.getValueType().isVector()) {
10958 Ops.push_back(V);
10959 continue;
10960 }
10961
10962 // "cast" fixed length vector to a scalable vector.
10963 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10964 "Only fixed length vectors are supported!");
10965 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10966 }
10967
10968 SDLoc DL(Op);
10969 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10970 if (HasMergeOp)
10971 Ops.push_back(DAG.getUNDEF(ContainerVT));
10972 if (HasMask)
10973 Ops.push_back(Mask);
10974 Ops.push_back(VL);
10975
10976 // StrictFP operations have two result values. Their lowered result should
10977 // have same result count.
10978 if (Op->isStrictFPOpcode()) {
10979 SDValue ScalableRes =
10980 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10981 Op->getFlags());
10982 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10983 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10984 }
10985
10986 SDValue ScalableRes =
10987 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10988 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10989}
10990
10991// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10992// * Operands of each node are assumed to be in the same order.
10993// * The EVL operand is promoted from i32 to i64 on RV64.
10994// * Fixed-length vectors are converted to their scalable-vector container
10995// types.
10996SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10997 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10998 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
10999
11000 SDLoc DL(Op);
11001 MVT VT = Op.getSimpleValueType();
11003
11004 MVT ContainerVT = VT;
11005 if (VT.isFixedLengthVector())
11006 ContainerVT = getContainerForFixedLengthVector(VT);
11007
11008 for (const auto &OpIdx : enumerate(Op->ops())) {
11009 SDValue V = OpIdx.value();
11010 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11011 // Add dummy merge value before the mask. Or if there isn't a mask, before
11012 // EVL.
11013 if (HasMergeOp) {
11014 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11015 if (MaskIdx) {
11016 if (*MaskIdx == OpIdx.index())
11017 Ops.push_back(DAG.getUNDEF(ContainerVT));
11018 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11019 OpIdx.index()) {
11020 if (Op.getOpcode() == ISD::VP_MERGE) {
11021 // For VP_MERGE, copy the false operand instead of an undef value.
11022 Ops.push_back(Ops.back());
11023 } else {
11024 assert(Op.getOpcode() == ISD::VP_SELECT);
11025 // For VP_SELECT, add an undef value.
11026 Ops.push_back(DAG.getUNDEF(ContainerVT));
11027 }
11028 }
11029 }
11030 // Pass through operands which aren't fixed-length vectors.
11031 if (!V.getValueType().isFixedLengthVector()) {
11032 Ops.push_back(V);
11033 continue;
11034 }
11035 // "cast" fixed length vector to a scalable vector.
11036 MVT OpVT = V.getSimpleValueType();
11037 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11038 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11039 "Only fixed length vectors are supported!");
11040 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11041 }
11042
11043 if (!VT.isFixedLengthVector())
11044 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11045
11046 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11047
11048 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11049}
11050
11051SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11052 SelectionDAG &DAG) const {
11053 SDLoc DL(Op);
11054 MVT VT = Op.getSimpleValueType();
11055
11056 SDValue Src = Op.getOperand(0);
11057 // NOTE: Mask is dropped.
11058 SDValue VL = Op.getOperand(2);
11059
11060 MVT ContainerVT = VT;
11061 if (VT.isFixedLengthVector()) {
11062 ContainerVT = getContainerForFixedLengthVector(VT);
11063 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11064 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11065 }
11066
11067 MVT XLenVT = Subtarget.getXLenVT();
11068 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11069 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11070 DAG.getUNDEF(ContainerVT), Zero, VL);
11071
11072 SDValue SplatValue = DAG.getConstant(
11073 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11074 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11075 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11076
11077 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11078 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11079 if (!VT.isFixedLengthVector())
11080 return Result;
11081 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11082}
11083
11084SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11085 SelectionDAG &DAG) const {
11086 SDLoc DL(Op);
11087 MVT VT = Op.getSimpleValueType();
11088
11089 SDValue Op1 = Op.getOperand(0);
11090 SDValue Op2 = Op.getOperand(1);
11091 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11092 // NOTE: Mask is dropped.
11093 SDValue VL = Op.getOperand(4);
11094
11095 MVT ContainerVT = VT;
11096 if (VT.isFixedLengthVector()) {
11097 ContainerVT = getContainerForFixedLengthVector(VT);
11098 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11099 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11100 }
11101
11103 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11104
11105 switch (Condition) {
11106 default:
11107 break;
11108 // X != Y --> (X^Y)
11109 case ISD::SETNE:
11110 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11111 break;
11112 // X == Y --> ~(X^Y)
11113 case ISD::SETEQ: {
11114 SDValue Temp =
11115 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11116 Result =
11117 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11118 break;
11119 }
11120 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11121 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11122 case ISD::SETGT:
11123 case ISD::SETULT: {
11124 SDValue Temp =
11125 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11126 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11127 break;
11128 }
11129 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11130 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11131 case ISD::SETLT:
11132 case ISD::SETUGT: {
11133 SDValue Temp =
11134 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11135 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11136 break;
11137 }
11138 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11139 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11140 case ISD::SETGE:
11141 case ISD::SETULE: {
11142 SDValue Temp =
11143 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11144 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11145 break;
11146 }
11147 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11148 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11149 case ISD::SETLE:
11150 case ISD::SETUGE: {
11151 SDValue Temp =
11152 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11153 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11154 break;
11155 }
11156 }
11157
11158 if (!VT.isFixedLengthVector())
11159 return Result;
11160 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11161}
11162
11163// Lower Floating-Point/Integer Type-Convert VP SDNodes
11164SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11165 SelectionDAG &DAG) const {
11166 SDLoc DL(Op);
11167
11168 SDValue Src = Op.getOperand(0);
11169 SDValue Mask = Op.getOperand(1);
11170 SDValue VL = Op.getOperand(2);
11171 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11172
11173 MVT DstVT = Op.getSimpleValueType();
11174 MVT SrcVT = Src.getSimpleValueType();
11175 if (DstVT.isFixedLengthVector()) {
11176 DstVT = getContainerForFixedLengthVector(DstVT);
11177 SrcVT = getContainerForFixedLengthVector(SrcVT);
11178 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11179 MVT MaskVT = getMaskTypeFor(DstVT);
11180 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11181 }
11182
11183 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11184 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11185
11187 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11188 if (SrcVT.isInteger()) {
11189 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11190
11191 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11194
11195 // Do we need to do any pre-widening before converting?
11196 if (SrcEltSize == 1) {
11197 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11198 MVT XLenVT = Subtarget.getXLenVT();
11199 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11200 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11201 DAG.getUNDEF(IntVT), Zero, VL);
11202 SDValue One = DAG.getConstant(
11203 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11204 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11205 DAG.getUNDEF(IntVT), One, VL);
11206 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11207 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11208 } else if (DstEltSize > (2 * SrcEltSize)) {
11209 // Widen before converting.
11210 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11211 DstVT.getVectorElementCount());
11212 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11213 }
11214
11215 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11216 } else {
11217 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11218 "Wrong input/output vector types");
11219
11220 // Convert f16 to f32 then convert f32 to i64.
11221 if (DstEltSize > (2 * SrcEltSize)) {
11222 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11223 MVT InterimFVT =
11224 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11225 Src =
11226 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11227 }
11228
11229 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11230 }
11231 } else { // Narrowing + Conversion
11232 if (SrcVT.isInteger()) {
11233 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11234 // First do a narrowing convert to an FP type half the size, then round
11235 // the FP type to a small FP type if needed.
11236
11237 MVT InterimFVT = DstVT;
11238 if (SrcEltSize > (2 * DstEltSize)) {
11239 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11240 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11241 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11242 }
11243
11244 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11245
11246 if (InterimFVT != DstVT) {
11247 Src = Result;
11248 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11249 }
11250 } else {
11251 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11252 "Wrong input/output vector types");
11253 // First do a narrowing conversion to an integer half the size, then
11254 // truncate if needed.
11255
11256 if (DstEltSize == 1) {
11257 // First convert to the same size integer, then convert to mask using
11258 // setcc.
11259 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11260 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11261 DstVT.getVectorElementCount());
11262 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11263
11264 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11265 // otherwise the conversion was undefined.
11266 MVT XLenVT = Subtarget.getXLenVT();
11267 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11268 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11269 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11270 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11271 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11272 DAG.getUNDEF(DstVT), Mask, VL});
11273 } else {
11274 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11275 DstVT.getVectorElementCount());
11276
11277 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11278
11279 while (InterimIVT != DstVT) {
11280 SrcEltSize /= 2;
11281 Src = Result;
11282 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11283 DstVT.getVectorElementCount());
11284 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11285 Src, Mask, VL);
11286 }
11287 }
11288 }
11289 }
11290
11291 MVT VT = Op.getSimpleValueType();
11292 if (!VT.isFixedLengthVector())
11293 return Result;
11294 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11295}
11296
11297SDValue
11298RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11299 SelectionDAG &DAG) const {
11300 SDLoc DL(Op);
11301
11302 SDValue Op1 = Op.getOperand(0);
11303 SDValue Op2 = Op.getOperand(1);
11304 SDValue Offset = Op.getOperand(2);
11305 SDValue Mask = Op.getOperand(3);
11306 SDValue EVL1 = Op.getOperand(4);
11307 SDValue EVL2 = Op.getOperand(5);
11308
11309 const MVT XLenVT = Subtarget.getXLenVT();
11310 MVT VT = Op.getSimpleValueType();
11311 MVT ContainerVT = VT;
11312 if (VT.isFixedLengthVector()) {
11313 ContainerVT = getContainerForFixedLengthVector(VT);
11314 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11315 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11316 MVT MaskVT = getMaskTypeFor(ContainerVT);
11317 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11318 }
11319
11320 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11321 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11322
11323 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11324 if (IsMaskVector) {
11325 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11326
11327 // Expand input operands
11328 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11329 DAG.getUNDEF(ContainerVT),
11330 DAG.getConstant(1, DL, XLenVT), EVL1);
11331 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11332 DAG.getUNDEF(ContainerVT),
11333 DAG.getConstant(0, DL, XLenVT), EVL1);
11334 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11335 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11336
11337 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11338 DAG.getUNDEF(ContainerVT),
11339 DAG.getConstant(1, DL, XLenVT), EVL2);
11340 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11341 DAG.getUNDEF(ContainerVT),
11342 DAG.getConstant(0, DL, XLenVT), EVL2);
11343 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11344 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11345 }
11346
11347 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11348 SDValue DownOffset, UpOffset;
11349 if (ImmValue >= 0) {
11350 // The operand is a TargetConstant, we need to rebuild it as a regular
11351 // constant.
11352 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11353 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11354 } else {
11355 // The operand is a TargetConstant, we need to rebuild it as a regular
11356 // constant rather than negating the original operand.
11357 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11358 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11359 }
11360
11361 SDValue SlideDown =
11362 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11363 Op1, DownOffset, Mask, UpOffset);
11364 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11365 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11366
11367 if (IsMaskVector) {
11368 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11369 Result = DAG.getNode(
11370 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11371 {Result, DAG.getConstant(0, DL, ContainerVT),
11372 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11373 Mask, EVL2});
11374 }
11375
11376 if (!VT.isFixedLengthVector())
11377 return Result;
11378 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11379}
11380
11381SDValue
11382RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11383 SelectionDAG &DAG) const {
11384 SDLoc DL(Op);
11385 MVT VT = Op.getSimpleValueType();
11386 MVT XLenVT = Subtarget.getXLenVT();
11387
11388 SDValue Op1 = Op.getOperand(0);
11389 SDValue Mask = Op.getOperand(1);
11390 SDValue EVL = Op.getOperand(2);
11391
11392 MVT ContainerVT = VT;
11393 if (VT.isFixedLengthVector()) {
11394 ContainerVT = getContainerForFixedLengthVector(VT);
11395 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11396 MVT MaskVT = getMaskTypeFor(ContainerVT);
11397 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11398 }
11399
11400 MVT GatherVT = ContainerVT;
11401 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11402 // Check if we are working with mask vectors
11403 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11404 if (IsMaskVector) {
11405 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11406
11407 // Expand input operand
11408 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11409 DAG.getUNDEF(IndicesVT),
11410 DAG.getConstant(1, DL, XLenVT), EVL);
11411 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11412 DAG.getUNDEF(IndicesVT),
11413 DAG.getConstant(0, DL, XLenVT), EVL);
11414 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11415 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11416 }
11417
11418 unsigned EltSize = GatherVT.getScalarSizeInBits();
11419 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11420 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11421 unsigned MaxVLMAX =
11422 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11423
11424 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11425 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11426 // to use vrgatherei16.vv.
11427 // TODO: It's also possible to use vrgatherei16.vv for other types to
11428 // decrease register width for the index calculation.
11429 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11430 if (MaxVLMAX > 256 && EltSize == 8) {
11431 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11432 // Split the vector in half and reverse each half using a full register
11433 // reverse.
11434 // Swap the halves and concatenate them.
11435 // Slide the concatenated result by (VLMax - VL).
11436 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11437 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11438 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11439
11440 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11441 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11442
11443 // Reassemble the low and high pieces reversed.
11444 // NOTE: this Result is unmasked (because we do not need masks for
11445 // shuffles). If in the future this has to change, we can use a SELECT_VL
11446 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11447 SDValue Result =
11448 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11449
11450 // Slide off any elements from past EVL that were reversed into the low
11451 // elements.
11452 unsigned MinElts = GatherVT.getVectorMinNumElements();
11453 SDValue VLMax =
11454 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11455 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11456
11457 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11458 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11459
11460 if (IsMaskVector) {
11461 // Truncate Result back to a mask vector
11462 Result =
11463 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11464 {Result, DAG.getConstant(0, DL, GatherVT),
11466 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11467 }
11468
11469 if (!VT.isFixedLengthVector())
11470 return Result;
11471 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11472 }
11473
11474 // Just promote the int type to i16 which will double the LMUL.
11475 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11476 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11477 }
11478
11479 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11480 SDValue VecLen =
11481 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11482 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11483 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11484 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11485 DAG.getUNDEF(IndicesVT), Mask, EVL);
11486 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11487 DAG.getUNDEF(GatherVT), Mask, EVL);
11488
11489 if (IsMaskVector) {
11490 // Truncate Result back to a mask vector
11491 Result = DAG.getNode(
11492 RISCVISD::SETCC_VL, DL, ContainerVT,
11493 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11494 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11495 }
11496
11497 if (!VT.isFixedLengthVector())
11498 return Result;
11499 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11500}
11501
11502SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11503 SelectionDAG &DAG) const {
11504 MVT VT = Op.getSimpleValueType();
11505 if (VT.getVectorElementType() != MVT::i1)
11506 return lowerVPOp(Op, DAG);
11507
11508 // It is safe to drop mask parameter as masked-off elements are undef.
11509 SDValue Op1 = Op->getOperand(0);
11510 SDValue Op2 = Op->getOperand(1);
11511 SDValue VL = Op->getOperand(3);
11512
11513 MVT ContainerVT = VT;
11514 const bool IsFixed = VT.isFixedLengthVector();
11515 if (IsFixed) {
11516 ContainerVT = getContainerForFixedLengthVector(VT);
11517 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11518 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11519 }
11520
11521 SDLoc DL(Op);
11522 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11523 if (!IsFixed)
11524 return Val;
11525 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11526}
11527
11528SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11529 SelectionDAG &DAG) const {
11530 SDLoc DL(Op);
11531 MVT XLenVT = Subtarget.getXLenVT();
11532 MVT VT = Op.getSimpleValueType();
11533 MVT ContainerVT = VT;
11534 if (VT.isFixedLengthVector())
11535 ContainerVT = getContainerForFixedLengthVector(VT);
11536
11537 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11538
11539 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11540 // Check if the mask is known to be all ones
11541 SDValue Mask = VPNode->getMask();
11542 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11543
11544 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11545 : Intrinsic::riscv_vlse_mask,
11546 DL, XLenVT);
11547 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11548 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11549 VPNode->getStride()};
11550 if (!IsUnmasked) {
11551 if (VT.isFixedLengthVector()) {
11552 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11553 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11554 }
11555 Ops.push_back(Mask);
11556 }
11557 Ops.push_back(VPNode->getVectorLength());
11558 if (!IsUnmasked) {
11559 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11560 Ops.push_back(Policy);
11561 }
11562
11563 SDValue Result =
11565 VPNode->getMemoryVT(), VPNode->getMemOperand());
11566 SDValue Chain = Result.getValue(1);
11567
11568 if (VT.isFixedLengthVector())
11569 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11570
11571 return DAG.getMergeValues({Result, Chain}, DL);
11572}
11573
11574SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11575 SelectionDAG &DAG) const {
11576 SDLoc DL(Op);
11577 MVT XLenVT = Subtarget.getXLenVT();
11578
11579 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11580 SDValue StoreVal = VPNode->getValue();
11581 MVT VT = StoreVal.getSimpleValueType();
11582 MVT ContainerVT = VT;
11583 if (VT.isFixedLengthVector()) {
11584 ContainerVT = getContainerForFixedLengthVector(VT);
11585 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11586 }
11587
11588 // Check if the mask is known to be all ones
11589 SDValue Mask = VPNode->getMask();
11590 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11591
11592 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11593 : Intrinsic::riscv_vsse_mask,
11594 DL, XLenVT);
11595 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11596 VPNode->getBasePtr(), VPNode->getStride()};
11597 if (!IsUnmasked) {
11598 if (VT.isFixedLengthVector()) {
11599 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11600 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11601 }
11602 Ops.push_back(Mask);
11603 }
11604 Ops.push_back(VPNode->getVectorLength());
11605
11606 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11607 Ops, VPNode->getMemoryVT(),
11608 VPNode->getMemOperand());
11609}
11610
11611// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11612// matched to a RVV indexed load. The RVV indexed load instructions only
11613// support the "unsigned unscaled" addressing mode; indices are implicitly
11614// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11615// signed or scaled indexing is extended to the XLEN value type and scaled
11616// accordingly.
11617SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11618 SelectionDAG &DAG) const {
11619 SDLoc DL(Op);
11620 MVT VT = Op.getSimpleValueType();
11621
11622 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11623 EVT MemVT = MemSD->getMemoryVT();
11624 MachineMemOperand *MMO = MemSD->getMemOperand();
11625 SDValue Chain = MemSD->getChain();
11626 SDValue BasePtr = MemSD->getBasePtr();
11627
11628 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11629 SDValue Index, Mask, PassThru, VL;
11630
11631 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11632 Index = VPGN->getIndex();
11633 Mask = VPGN->getMask();
11634 PassThru = DAG.getUNDEF(VT);
11635 VL = VPGN->getVectorLength();
11636 // VP doesn't support extending loads.
11638 } else {
11639 // Else it must be a MGATHER.
11640 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11641 Index = MGN->getIndex();
11642 Mask = MGN->getMask();
11643 PassThru = MGN->getPassThru();
11644 LoadExtType = MGN->getExtensionType();
11645 }
11646
11647 MVT IndexVT = Index.getSimpleValueType();
11648 MVT XLenVT = Subtarget.getXLenVT();
11649
11651 "Unexpected VTs!");
11652 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11653 // Targets have to explicitly opt-in for extending vector loads.
11654 assert(LoadExtType == ISD::NON_EXTLOAD &&
11655 "Unexpected extending MGATHER/VP_GATHER");
11656
11657 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11658 // the selection of the masked intrinsics doesn't do this for us.
11659 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11660
11661 MVT ContainerVT = VT;
11662 if (VT.isFixedLengthVector()) {
11663 ContainerVT = getContainerForFixedLengthVector(VT);
11664 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11665 ContainerVT.getVectorElementCount());
11666
11667 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11668
11669 if (!IsUnmasked) {
11670 MVT MaskVT = getMaskTypeFor(ContainerVT);
11671 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11672 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11673 }
11674 }
11675
11676 if (!VL)
11677 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11678
11679 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11680 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11681 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11682 }
11683
11684 unsigned IntID =
11685 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11686 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11687 if (IsUnmasked)
11688 Ops.push_back(DAG.getUNDEF(ContainerVT));
11689 else
11690 Ops.push_back(PassThru);
11691 Ops.push_back(BasePtr);
11692 Ops.push_back(Index);
11693 if (!IsUnmasked)
11694 Ops.push_back(Mask);
11695 Ops.push_back(VL);
11696 if (!IsUnmasked)
11698
11699 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11700 SDValue Result =
11701 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11702 Chain = Result.getValue(1);
11703
11704 if (VT.isFixedLengthVector())
11705 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11706
11707 return DAG.getMergeValues({Result, Chain}, DL);
11708}
11709
11710// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11711// matched to a RVV indexed store. The RVV indexed store instructions only
11712// support the "unsigned unscaled" addressing mode; indices are implicitly
11713// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11714// signed or scaled indexing is extended to the XLEN value type and scaled
11715// accordingly.
11716SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11717 SelectionDAG &DAG) const {
11718 SDLoc DL(Op);
11719 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11720 EVT MemVT = MemSD->getMemoryVT();
11721 MachineMemOperand *MMO = MemSD->getMemOperand();
11722 SDValue Chain = MemSD->getChain();
11723 SDValue BasePtr = MemSD->getBasePtr();
11724
11725 [[maybe_unused]] bool IsTruncatingStore = false;
11726 SDValue Index, Mask, Val, VL;
11727
11728 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11729 Index = VPSN->getIndex();
11730 Mask = VPSN->getMask();
11731 Val = VPSN->getValue();
11732 VL = VPSN->getVectorLength();
11733 // VP doesn't support truncating stores.
11734 IsTruncatingStore = false;
11735 } else {
11736 // Else it must be a MSCATTER.
11737 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11738 Index = MSN->getIndex();
11739 Mask = MSN->getMask();
11740 Val = MSN->getValue();
11741 IsTruncatingStore = MSN->isTruncatingStore();
11742 }
11743
11744 MVT VT = Val.getSimpleValueType();
11745 MVT IndexVT = Index.getSimpleValueType();
11746 MVT XLenVT = Subtarget.getXLenVT();
11747
11749 "Unexpected VTs!");
11750 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11751 // Targets have to explicitly opt-in for extending vector loads and
11752 // truncating vector stores.
11753 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11754
11755 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11756 // the selection of the masked intrinsics doesn't do this for us.
11757 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11758
11759 MVT ContainerVT = VT;
11760 if (VT.isFixedLengthVector()) {
11761 ContainerVT = getContainerForFixedLengthVector(VT);
11762 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11763 ContainerVT.getVectorElementCount());
11764
11765 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11766 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11767
11768 if (!IsUnmasked) {
11769 MVT MaskVT = getMaskTypeFor(ContainerVT);
11770 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11771 }
11772 }
11773
11774 if (!VL)
11775 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11776
11777 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11778 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11779 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11780 }
11781
11782 unsigned IntID =
11783 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11784 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11785 Ops.push_back(Val);
11786 Ops.push_back(BasePtr);
11787 Ops.push_back(Index);
11788 if (!IsUnmasked)
11789 Ops.push_back(Mask);
11790 Ops.push_back(VL);
11791
11793 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11794}
11795
11796SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11797 SelectionDAG &DAG) const {
11798 const MVT XLenVT = Subtarget.getXLenVT();
11799 SDLoc DL(Op);
11800 SDValue Chain = Op->getOperand(0);
11801 SDValue SysRegNo = DAG.getTargetConstant(
11802 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11803 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11804 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11805
11806 // Encoding used for rounding mode in RISC-V differs from that used in
11807 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11808 // table, which consists of a sequence of 4-bit fields, each representing
11809 // corresponding FLT_ROUNDS mode.
11810 static const int Table =
11816
11817 SDValue Shift =
11818 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11819 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11820 DAG.getConstant(Table, DL, XLenVT), Shift);
11821 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11822 DAG.getConstant(7, DL, XLenVT));
11823
11824 return DAG.getMergeValues({Masked, Chain}, DL);
11825}
11826
11827SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11828 SelectionDAG &DAG) const {
11829 const MVT XLenVT = Subtarget.getXLenVT();
11830 SDLoc DL(Op);
11831 SDValue Chain = Op->getOperand(0);
11832 SDValue RMValue = Op->getOperand(1);
11833 SDValue SysRegNo = DAG.getTargetConstant(
11834 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11835
11836 // Encoding used for rounding mode in RISC-V differs from that used in
11837 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11838 // a table, which consists of a sequence of 4-bit fields, each representing
11839 // corresponding RISC-V mode.
11840 static const unsigned Table =
11846
11847 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11848
11849 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11850 DAG.getConstant(2, DL, XLenVT));
11851 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11852 DAG.getConstant(Table, DL, XLenVT), Shift);
11853 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11854 DAG.getConstant(0x7, DL, XLenVT));
11855 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11856 RMValue);
11857}
11858
11859SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11860 SelectionDAG &DAG) const {
11862
11863 bool isRISCV64 = Subtarget.is64Bit();
11864 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11865
11866 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11867 return DAG.getFrameIndex(FI, PtrVT);
11868}
11869
11870// Returns the opcode of the target-specific SDNode that implements the 32-bit
11871// form of the given Opcode.
11872static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11873 switch (Opcode) {
11874 default:
11875 llvm_unreachable("Unexpected opcode");
11876 case ISD::SHL:
11877 return RISCVISD::SLLW;
11878 case ISD::SRA:
11879 return RISCVISD::SRAW;
11880 case ISD::SRL:
11881 return RISCVISD::SRLW;
11882 case ISD::SDIV:
11883 return RISCVISD::DIVW;
11884 case ISD::UDIV:
11885 return RISCVISD::DIVUW;
11886 case ISD::UREM:
11887 return RISCVISD::REMUW;
11888 case ISD::ROTL:
11889 return RISCVISD::ROLW;
11890 case ISD::ROTR:
11891 return RISCVISD::RORW;
11892 }
11893}
11894
11895// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11896// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11897// otherwise be promoted to i64, making it difficult to select the
11898// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11899// type i8/i16/i32 is lost.
11901 unsigned ExtOpc = ISD::ANY_EXTEND) {
11902 SDLoc DL(N);
11903 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11904 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11905 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11906 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11907 // ReplaceNodeResults requires we maintain the same type for the return value.
11908 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11909}
11910
11911// Converts the given 32-bit operation to a i64 operation with signed extension
11912// semantic to reduce the signed extension instructions.
11914 SDLoc DL(N);
11915 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11916 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11917 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11918 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11919 DAG.getValueType(MVT::i32));
11920 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11921}
11922
11925 SelectionDAG &DAG) const {
11926 SDLoc DL(N);
11927 switch (N->getOpcode()) {
11928 default:
11929 llvm_unreachable("Don't know how to custom type legalize this operation!");
11932 case ISD::FP_TO_SINT:
11933 case ISD::FP_TO_UINT: {
11934 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11935 "Unexpected custom legalisation");
11936 bool IsStrict = N->isStrictFPOpcode();
11937 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11938 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11939 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11940 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11942 if (!isTypeLegal(Op0.getValueType()))
11943 return;
11944 if (IsStrict) {
11945 SDValue Chain = N->getOperand(0);
11946 // In absense of Zfh, promote f16 to f32, then convert.
11947 if (Op0.getValueType() == MVT::f16 &&
11948 !Subtarget.hasStdExtZfhOrZhinx()) {
11949 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11950 {Chain, Op0});
11951 Chain = Op0.getValue(1);
11952 }
11953 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11955 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11956 SDValue Res = DAG.getNode(
11957 Opc, DL, VTs, Chain, Op0,
11958 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11959 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11960 Results.push_back(Res.getValue(1));
11961 return;
11962 }
11963 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11964 // convert.
11965 if ((Op0.getValueType() == MVT::f16 &&
11966 !Subtarget.hasStdExtZfhOrZhinx()) ||
11967 Op0.getValueType() == MVT::bf16)
11968 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11969
11970 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11971 SDValue Res =
11972 DAG.getNode(Opc, DL, MVT::i64, Op0,
11973 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11974 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11975 return;
11976 }
11977 // If the FP type needs to be softened, emit a library call using the 'si'
11978 // version. If we left it to default legalization we'd end up with 'di'. If
11979 // the FP type doesn't need to be softened just let generic type
11980 // legalization promote the result type.
11981 RTLIB::Libcall LC;
11982 if (IsSigned)
11983 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11984 else
11985 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11986 MakeLibCallOptions CallOptions;
11987 EVT OpVT = Op0.getValueType();
11988 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11989 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11990 SDValue Result;
11991 std::tie(Result, Chain) =
11992 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11993 Results.push_back(Result);
11994 if (IsStrict)
11995 Results.push_back(Chain);
11996 break;
11997 }
11998 case ISD::LROUND: {
11999 SDValue Op0 = N->getOperand(0);
12000 EVT Op0VT = Op0.getValueType();
12001 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12003 if (!isTypeLegal(Op0VT))
12004 return;
12005
12006 // In absense of Zfh, promote f16 to f32, then convert.
12007 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12008 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12009
12010 SDValue Res =
12011 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12012 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12013 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12014 return;
12015 }
12016 // If the FP type needs to be softened, emit a library call to lround. We'll
12017 // need to truncate the result. We assume any value that doesn't fit in i32
12018 // is allowed to return an unspecified value.
12019 RTLIB::Libcall LC =
12020 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12021 MakeLibCallOptions CallOptions;
12022 EVT OpVT = Op0.getValueType();
12023 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12024 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12025 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12026 Results.push_back(Result);
12027 break;
12028 }
12031 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12032 "has custom type legalization on riscv32");
12033
12034 SDValue LoCounter, HiCounter;
12035 MVT XLenVT = Subtarget.getXLenVT();
12036 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12037 LoCounter = DAG.getTargetConstant(
12038 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
12039 HiCounter = DAG.getTargetConstant(
12040 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
12041 } else {
12042 LoCounter = DAG.getTargetConstant(
12043 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
12044 HiCounter = DAG.getTargetConstant(
12045 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
12046 }
12047 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12049 N->getOperand(0), LoCounter, HiCounter);
12050
12051 Results.push_back(
12052 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
12053 Results.push_back(RCW.getValue(2));
12054 break;
12055 }
12056 case ISD::LOAD: {
12057 if (!ISD::isNON_EXTLoad(N))
12058 return;
12059
12060 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12061 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12062 LoadSDNode *Ld = cast<LoadSDNode>(N);
12063
12064 SDLoc dl(N);
12065 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12066 Ld->getBasePtr(), Ld->getMemoryVT(),
12067 Ld->getMemOperand());
12068 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12069 Results.push_back(Res.getValue(1));
12070 return;
12071 }
12072 case ISD::MUL: {
12073 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12074 unsigned XLen = Subtarget.getXLen();
12075 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12076 if (Size > XLen) {
12077 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12078 SDValue LHS = N->getOperand(0);
12079 SDValue RHS = N->getOperand(1);
12080 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12081
12082 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12083 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12084 // We need exactly one side to be unsigned.
12085 if (LHSIsU == RHSIsU)
12086 return;
12087
12088 auto MakeMULPair = [&](SDValue S, SDValue U) {
12089 MVT XLenVT = Subtarget.getXLenVT();
12090 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12091 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12092 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12093 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12094 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12095 };
12096
12097 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12098 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12099
12100 // The other operand should be signed, but still prefer MULH when
12101 // possible.
12102 if (RHSIsU && LHSIsS && !RHSIsS)
12103 Results.push_back(MakeMULPair(LHS, RHS));
12104 else if (LHSIsU && RHSIsS && !LHSIsS)
12105 Results.push_back(MakeMULPair(RHS, LHS));
12106
12107 return;
12108 }
12109 [[fallthrough]];
12110 }
12111 case ISD::ADD:
12112 case ISD::SUB:
12113 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12114 "Unexpected custom legalisation");
12115 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12116 break;
12117 case ISD::SHL:
12118 case ISD::SRA:
12119 case ISD::SRL:
12120 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12121 "Unexpected custom legalisation");
12122 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12123 // If we can use a BSET instruction, allow default promotion to apply.
12124 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12125 isOneConstant(N->getOperand(0)))
12126 break;
12127 Results.push_back(customLegalizeToWOp(N, DAG));
12128 break;
12129 }
12130
12131 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12132 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12133 // shift amount.
12134 if (N->getOpcode() == ISD::SHL) {
12135 SDLoc DL(N);
12136 SDValue NewOp0 =
12137 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12138 SDValue NewOp1 =
12139 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12140 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12141 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12142 DAG.getValueType(MVT::i32));
12143 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12144 }
12145
12146 break;
12147 case ISD::ROTL:
12148 case ISD::ROTR:
12149 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12150 "Unexpected custom legalisation");
12151 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12152 Subtarget.hasVendorXTHeadBb()) &&
12153 "Unexpected custom legalization");
12154 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12155 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12156 return;
12157 Results.push_back(customLegalizeToWOp(N, DAG));
12158 break;
12159 case ISD::CTTZ:
12161 case ISD::CTLZ:
12162 case ISD::CTLZ_ZERO_UNDEF: {
12163 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12164 "Unexpected custom legalisation");
12165
12166 SDValue NewOp0 =
12167 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12168 bool IsCTZ =
12169 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12170 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12171 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12172 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12173 return;
12174 }
12175 case ISD::SDIV:
12176 case ISD::UDIV:
12177 case ISD::UREM: {
12178 MVT VT = N->getSimpleValueType(0);
12179 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12180 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12181 "Unexpected custom legalisation");
12182 // Don't promote division/remainder by constant since we should expand those
12183 // to multiply by magic constant.
12185 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12186 !isIntDivCheap(N->getValueType(0), Attr))
12187 return;
12188
12189 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12190 // the upper 32 bits. For other types we need to sign or zero extend
12191 // based on the opcode.
12192 unsigned ExtOpc = ISD::ANY_EXTEND;
12193 if (VT != MVT::i32)
12194 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12196
12197 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12198 break;
12199 }
12200 case ISD::SADDO: {
12201 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12202 "Unexpected custom legalisation");
12203
12204 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12205 // use the default legalization.
12206 if (!isa<ConstantSDNode>(N->getOperand(1)))
12207 return;
12208
12209 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12210 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12211 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12212 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12213 DAG.getValueType(MVT::i32));
12214
12215 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12216
12217 // For an addition, the result should be less than one of the operands (LHS)
12218 // if and only if the other operand (RHS) is negative, otherwise there will
12219 // be overflow.
12220 // For a subtraction, the result should be less than one of the operands
12221 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12222 // otherwise there will be overflow.
12223 EVT OType = N->getValueType(1);
12224 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12225 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12226
12227 SDValue Overflow =
12228 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12229 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12230 Results.push_back(Overflow);
12231 return;
12232 }
12233 case ISD::UADDO:
12234 case ISD::USUBO: {
12235 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12236 "Unexpected custom legalisation");
12237 bool IsAdd = N->getOpcode() == ISD::UADDO;
12238 // Create an ADDW or SUBW.
12239 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12240 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12241 SDValue Res =
12242 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12243 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12244 DAG.getValueType(MVT::i32));
12245
12246 SDValue Overflow;
12247 if (IsAdd && isOneConstant(RHS)) {
12248 // Special case uaddo X, 1 overflowed if the addition result is 0.
12249 // The general case (X + C) < C is not necessarily beneficial. Although we
12250 // reduce the live range of X, we may introduce the materialization of
12251 // constant C, especially when the setcc result is used by branch. We have
12252 // no compare with constant and branch instructions.
12253 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12254 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12255 } else if (IsAdd && isAllOnesConstant(RHS)) {
12256 // Special case uaddo X, -1 overflowed if X != 0.
12257 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12258 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12259 } else {
12260 // Sign extend the LHS and perform an unsigned compare with the ADDW
12261 // result. Since the inputs are sign extended from i32, this is equivalent
12262 // to comparing the lower 32 bits.
12263 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12264 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12265 IsAdd ? ISD::SETULT : ISD::SETUGT);
12266 }
12267
12268 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12269 Results.push_back(Overflow);
12270 return;
12271 }
12272 case ISD::UADDSAT:
12273 case ISD::USUBSAT: {
12274 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12275 "Unexpected custom legalisation");
12276 if (Subtarget.hasStdExtZbb()) {
12277 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12278 // sign extend allows overflow of the lower 32 bits to be detected on
12279 // the promoted size.
12280 SDValue LHS =
12281 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12282 SDValue RHS =
12283 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12284 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12285 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12286 return;
12287 }
12288
12289 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12290 // promotion for UADDO/USUBO.
12291 Results.push_back(expandAddSubSat(N, DAG));
12292 return;
12293 }
12294 case ISD::SADDSAT:
12295 case ISD::SSUBSAT: {
12296 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12297 "Unexpected custom legalisation");
12298 Results.push_back(expandAddSubSat(N, DAG));
12299 return;
12300 }
12301 case ISD::ABS: {
12302 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12303 "Unexpected custom legalisation");
12304
12305 if (Subtarget.hasStdExtZbb()) {
12306 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12307 // This allows us to remember that the result is sign extended. Expanding
12308 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12309 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12310 N->getOperand(0));
12311 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12312 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12313 return;
12314 }
12315
12316 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12317 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12318
12319 // Freeze the source so we can increase it's use count.
12320 Src = DAG.getFreeze(Src);
12321
12322 // Copy sign bit to all bits using the sraiw pattern.
12323 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12324 DAG.getValueType(MVT::i32));
12325 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12326 DAG.getConstant(31, DL, MVT::i64));
12327
12328 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12329 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12330
12331 // NOTE: The result is only required to be anyextended, but sext is
12332 // consistent with type legalization of sub.
12333 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12334 DAG.getValueType(MVT::i32));
12335 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12336 return;
12337 }
12338 case ISD::BITCAST: {
12339 EVT VT = N->getValueType(0);
12340 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12341 SDValue Op0 = N->getOperand(0);
12342 EVT Op0VT = Op0.getValueType();
12343 MVT XLenVT = Subtarget.getXLenVT();
12344 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12345 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12346 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12347 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12348 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12349 Subtarget.hasStdExtZfbfmin()) {
12350 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12351 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12352 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12353 Subtarget.hasStdExtFOrZfinx()) {
12354 SDValue FPConv =
12355 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12356 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12357 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12358 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12359 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12360 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12361 NewReg.getValue(0), NewReg.getValue(1));
12362 Results.push_back(RetReg);
12363 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12364 isTypeLegal(Op0VT)) {
12365 // Custom-legalize bitcasts from fixed-length vector types to illegal
12366 // scalar types in order to improve codegen. Bitcast the vector to a
12367 // one-element vector type whose element type is the same as the result
12368 // type, and extract the first element.
12369 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12370 if (isTypeLegal(BVT)) {
12371 SDValue BVec = DAG.getBitcast(BVT, Op0);
12372 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12373 DAG.getVectorIdxConstant(0, DL)));
12374 }
12375 }
12376 break;
12377 }
12378 case RISCVISD::BREV8: {
12379 MVT VT = N->getSimpleValueType(0);
12380 MVT XLenVT = Subtarget.getXLenVT();
12381 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12382 "Unexpected custom legalisation");
12383 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12384 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12385 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12386 // ReplaceNodeResults requires we maintain the same type for the return
12387 // value.
12388 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12389 break;
12390 }
12392 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12393 // type is illegal (currently only vXi64 RV32).
12394 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12395 // transferred to the destination register. We issue two of these from the
12396 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12397 // first element.
12398 SDValue Vec = N->getOperand(0);
12399 SDValue Idx = N->getOperand(1);
12400
12401 // The vector type hasn't been legalized yet so we can't issue target
12402 // specific nodes if it needs legalization.
12403 // FIXME: We would manually legalize if it's important.
12404 if (!isTypeLegal(Vec.getValueType()))
12405 return;
12406
12407 MVT VecVT = Vec.getSimpleValueType();
12408
12409 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12410 VecVT.getVectorElementType() == MVT::i64 &&
12411 "Unexpected EXTRACT_VECTOR_ELT legalization");
12412
12413 // If this is a fixed vector, we need to convert it to a scalable vector.
12414 MVT ContainerVT = VecVT;
12415 if (VecVT.isFixedLengthVector()) {
12416 ContainerVT = getContainerForFixedLengthVector(VecVT);
12417 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12418 }
12419
12420 MVT XLenVT = Subtarget.getXLenVT();
12421
12422 // Use a VL of 1 to avoid processing more elements than we need.
12423 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12424
12425 // Unless the index is known to be 0, we must slide the vector down to get
12426 // the desired element into index 0.
12427 if (!isNullConstant(Idx)) {
12428 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12429 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12430 }
12431
12432 // Extract the lower XLEN bits of the correct vector element.
12433 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12434
12435 // To extract the upper XLEN bits of the vector element, shift the first
12436 // element right by 32 bits and re-extract the lower XLEN bits.
12437 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12438 DAG.getUNDEF(ContainerVT),
12439 DAG.getConstant(32, DL, XLenVT), VL);
12440 SDValue LShr32 =
12441 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12442 DAG.getUNDEF(ContainerVT), Mask, VL);
12443
12444 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12445
12446 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12447 break;
12448 }
12450 unsigned IntNo = N->getConstantOperandVal(0);
12451 switch (IntNo) {
12452 default:
12454 "Don't know how to custom type legalize this intrinsic!");
12455 case Intrinsic::experimental_get_vector_length: {
12456 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12457 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12458 return;
12459 }
12460 case Intrinsic::experimental_cttz_elts: {
12461 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12462 Results.push_back(
12463 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12464 return;
12465 }
12466 case Intrinsic::riscv_orc_b:
12467 case Intrinsic::riscv_brev8:
12468 case Intrinsic::riscv_sha256sig0:
12469 case Intrinsic::riscv_sha256sig1:
12470 case Intrinsic::riscv_sha256sum0:
12471 case Intrinsic::riscv_sha256sum1:
12472 case Intrinsic::riscv_sm3p0:
12473 case Intrinsic::riscv_sm3p1: {
12474 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12475 return;
12476 unsigned Opc;
12477 switch (IntNo) {
12478 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12479 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12480 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12481 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12482 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12483 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12484 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12485 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12486 }
12487
12488 SDValue NewOp =
12489 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12490 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12491 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12492 return;
12493 }
12494 case Intrinsic::riscv_sm4ks:
12495 case Intrinsic::riscv_sm4ed: {
12496 unsigned Opc =
12497 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12498 SDValue NewOp0 =
12499 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12500 SDValue NewOp1 =
12501 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12502 SDValue Res =
12503 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12504 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12505 return;
12506 }
12507 case Intrinsic::riscv_mopr: {
12508 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12509 return;
12510 SDValue NewOp =
12511 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12512 SDValue Res = DAG.getNode(
12513 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12514 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12515 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12516 return;
12517 }
12518 case Intrinsic::riscv_moprr: {
12519 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12520 return;
12521 SDValue NewOp0 =
12522 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12523 SDValue NewOp1 =
12524 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12525 SDValue Res = DAG.getNode(
12526 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12527 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12528 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12529 return;
12530 }
12531 case Intrinsic::riscv_clmul: {
12532 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12533 return;
12534
12535 SDValue NewOp0 =
12536 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12537 SDValue NewOp1 =
12538 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12539 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12540 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12541 return;
12542 }
12543 case Intrinsic::riscv_clmulh:
12544 case Intrinsic::riscv_clmulr: {
12545 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12546 return;
12547
12548 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12549 // to the full 128-bit clmul result of multiplying two xlen values.
12550 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12551 // upper 32 bits.
12552 //
12553 // The alternative is to mask the inputs to 32 bits and use clmul, but
12554 // that requires two shifts to mask each input without zext.w.
12555 // FIXME: If the inputs are known zero extended or could be freely
12556 // zero extended, the mask form would be better.
12557 SDValue NewOp0 =
12558 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12559 SDValue NewOp1 =
12560 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12561 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12562 DAG.getConstant(32, DL, MVT::i64));
12563 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12564 DAG.getConstant(32, DL, MVT::i64));
12565 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12567 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12568 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12569 DAG.getConstant(32, DL, MVT::i64));
12570 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12571 return;
12572 }
12573 case Intrinsic::riscv_vmv_x_s: {
12574 EVT VT = N->getValueType(0);
12575 MVT XLenVT = Subtarget.getXLenVT();
12576 if (VT.bitsLT(XLenVT)) {
12577 // Simple case just extract using vmv.x.s and truncate.
12578 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12579 Subtarget.getXLenVT(), N->getOperand(1));
12580 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12581 return;
12582 }
12583
12584 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12585 "Unexpected custom legalization");
12586
12587 // We need to do the move in two steps.
12588 SDValue Vec = N->getOperand(1);
12589 MVT VecVT = Vec.getSimpleValueType();
12590
12591 // First extract the lower XLEN bits of the element.
12592 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12593
12594 // To extract the upper XLEN bits of the vector element, shift the first
12595 // element right by 32 bits and re-extract the lower XLEN bits.
12596 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12597
12598 SDValue ThirtyTwoV =
12599 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12600 DAG.getConstant(32, DL, XLenVT), VL);
12601 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12602 DAG.getUNDEF(VecVT), Mask, VL);
12603 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12604
12605 Results.push_back(
12606 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12607 break;
12608 }
12609 }
12610 break;
12611 }
12612 case ISD::VECREDUCE_ADD:
12613 case ISD::VECREDUCE_AND:
12614 case ISD::VECREDUCE_OR:
12615 case ISD::VECREDUCE_XOR:
12620 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12621 Results.push_back(V);
12622 break;
12623 case ISD::VP_REDUCE_ADD:
12624 case ISD::VP_REDUCE_AND:
12625 case ISD::VP_REDUCE_OR:
12626 case ISD::VP_REDUCE_XOR:
12627 case ISD::VP_REDUCE_SMAX:
12628 case ISD::VP_REDUCE_UMAX:
12629 case ISD::VP_REDUCE_SMIN:
12630 case ISD::VP_REDUCE_UMIN:
12631 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12632 Results.push_back(V);
12633 break;
12634 case ISD::GET_ROUNDING: {
12635 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12636 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12637 Results.push_back(Res.getValue(0));
12638 Results.push_back(Res.getValue(1));
12639 break;
12640 }
12641 }
12642}
12643
12644/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12645/// which corresponds to it.
12646static unsigned getVecReduceOpcode(unsigned Opc) {
12647 switch (Opc) {
12648 default:
12649 llvm_unreachable("Unhandled binary to transfrom reduction");
12650 case ISD::ADD:
12651 return ISD::VECREDUCE_ADD;
12652 case ISD::UMAX:
12653 return ISD::VECREDUCE_UMAX;
12654 case ISD::SMAX:
12655 return ISD::VECREDUCE_SMAX;
12656 case ISD::UMIN:
12657 return ISD::VECREDUCE_UMIN;
12658 case ISD::SMIN:
12659 return ISD::VECREDUCE_SMIN;
12660 case ISD::AND:
12661 return ISD::VECREDUCE_AND;
12662 case ISD::OR:
12663 return ISD::VECREDUCE_OR;
12664 case ISD::XOR:
12665 return ISD::VECREDUCE_XOR;
12666 case ISD::FADD:
12667 // Note: This is the associative form of the generic reduction opcode.
12668 return ISD::VECREDUCE_FADD;
12669 }
12670}
12671
12672/// Perform two related transforms whose purpose is to incrementally recognize
12673/// an explode_vector followed by scalar reduction as a vector reduction node.
12674/// This exists to recover from a deficiency in SLP which can't handle
12675/// forests with multiple roots sharing common nodes. In some cases, one
12676/// of the trees will be vectorized, and the other will remain (unprofitably)
12677/// scalarized.
12678static SDValue
12680 const RISCVSubtarget &Subtarget) {
12681
12682 // This transforms need to run before all integer types have been legalized
12683 // to i64 (so that the vector element type matches the add type), and while
12684 // it's safe to introduce odd sized vector types.
12686 return SDValue();
12687
12688 // Without V, this transform isn't useful. We could form the (illegal)
12689 // operations and let them be scalarized again, but there's really no point.
12690 if (!Subtarget.hasVInstructions())
12691 return SDValue();
12692
12693 const SDLoc DL(N);
12694 const EVT VT = N->getValueType(0);
12695 const unsigned Opc = N->getOpcode();
12696
12697 // For FADD, we only handle the case with reassociation allowed. We
12698 // could handle strict reduction order, but at the moment, there's no
12699 // known reason to, and the complexity isn't worth it.
12700 // TODO: Handle fminnum and fmaxnum here
12701 if (!VT.isInteger() &&
12702 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12703 return SDValue();
12704
12705 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12706 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12707 "Inconsistent mappings");
12708 SDValue LHS = N->getOperand(0);
12709 SDValue RHS = N->getOperand(1);
12710
12711 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12712 return SDValue();
12713
12714 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12715 std::swap(LHS, RHS);
12716
12717 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12718 !isa<ConstantSDNode>(RHS.getOperand(1)))
12719 return SDValue();
12720
12721 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12722 SDValue SrcVec = RHS.getOperand(0);
12723 EVT SrcVecVT = SrcVec.getValueType();
12724 assert(SrcVecVT.getVectorElementType() == VT);
12725 if (SrcVecVT.isScalableVector())
12726 return SDValue();
12727
12728 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12729 return SDValue();
12730
12731 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12732 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12733 // root of our reduction tree. TODO: We could extend this to any two
12734 // adjacent aligned constant indices if desired.
12735 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12736 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12737 uint64_t LHSIdx =
12738 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12739 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12740 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12741 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12742 DAG.getVectorIdxConstant(0, DL));
12743 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12744 }
12745 }
12746
12747 // Match (binop (reduce (extract_subvector V, 0),
12748 // (extract_vector_elt V, sizeof(SubVec))))
12749 // into a reduction of one more element from the original vector V.
12750 if (LHS.getOpcode() != ReduceOpc)
12751 return SDValue();
12752
12753 SDValue ReduceVec = LHS.getOperand(0);
12754 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12755 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12756 isNullConstant(ReduceVec.getOperand(1)) &&
12757 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12758 // For illegal types (e.g. 3xi32), most will be combined again into a
12759 // wider (hopefully legal) type. If this is a terminal state, we are
12760 // relying on type legalization here to produce something reasonable
12761 // and this lowering quality could probably be improved. (TODO)
12762 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12763 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12764 DAG.getVectorIdxConstant(0, DL));
12765 auto Flags = ReduceVec->getFlags();
12766 Flags.intersectWith(N->getFlags());
12767 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12768 }
12769
12770 return SDValue();
12771}
12772
12773
12774// Try to fold (<bop> x, (reduction.<bop> vec, start))
12776 const RISCVSubtarget &Subtarget) {
12777 auto BinOpToRVVReduce = [](unsigned Opc) {
12778 switch (Opc) {
12779 default:
12780 llvm_unreachable("Unhandled binary to transfrom reduction");
12781 case ISD::ADD:
12783 case ISD::UMAX:
12785 case ISD::SMAX:
12787 case ISD::UMIN:
12789 case ISD::SMIN:
12791 case ISD::AND:
12793 case ISD::OR:
12795 case ISD::XOR:
12797 case ISD::FADD:
12799 case ISD::FMAXNUM:
12801 case ISD::FMINNUM:
12803 }
12804 };
12805
12806 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12807 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12808 isNullConstant(V.getOperand(1)) &&
12809 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12810 };
12811
12812 unsigned Opc = N->getOpcode();
12813 unsigned ReduceIdx;
12814 if (IsReduction(N->getOperand(0), Opc))
12815 ReduceIdx = 0;
12816 else if (IsReduction(N->getOperand(1), Opc))
12817 ReduceIdx = 1;
12818 else
12819 return SDValue();
12820
12821 // Skip if FADD disallows reassociation but the combiner needs.
12822 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12823 return SDValue();
12824
12825 SDValue Extract = N->getOperand(ReduceIdx);
12826 SDValue Reduce = Extract.getOperand(0);
12827 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12828 return SDValue();
12829
12830 SDValue ScalarV = Reduce.getOperand(2);
12831 EVT ScalarVT = ScalarV.getValueType();
12832 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12833 ScalarV.getOperand(0)->isUndef() &&
12834 isNullConstant(ScalarV.getOperand(2)))
12835 ScalarV = ScalarV.getOperand(1);
12836
12837 // Make sure that ScalarV is a splat with VL=1.
12838 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12839 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12840 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12841 return SDValue();
12842
12843 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12844 return SDValue();
12845
12846 // Check the scalar of ScalarV is neutral element
12847 // TODO: Deal with value other than neutral element.
12848 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12849 0))
12850 return SDValue();
12851
12852 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12853 // FIXME: We might be able to improve this if operand 0 is undef.
12854 if (!isNonZeroAVL(Reduce.getOperand(5)))
12855 return SDValue();
12856
12857 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12858
12859 SDLoc DL(N);
12860 SDValue NewScalarV =
12861 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12862 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12863
12864 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12865 if (ScalarVT != ScalarV.getValueType())
12866 NewScalarV =
12867 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12868 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12869
12870 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12871 NewScalarV, Reduce.getOperand(3),
12872 Reduce.getOperand(4), Reduce.getOperand(5)};
12873 SDValue NewReduce =
12874 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12875 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12876 Extract.getOperand(1));
12877}
12878
12879// Optimize (add (shl x, c0), (shl y, c1)) ->
12880// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12882 const RISCVSubtarget &Subtarget) {
12883 // Perform this optimization only in the zba extension.
12884 if (!Subtarget.hasStdExtZba())
12885 return SDValue();
12886
12887 // Skip for vector types and larger types.
12888 EVT VT = N->getValueType(0);
12889 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12890 return SDValue();
12891
12892 // The two operand nodes must be SHL and have no other use.
12893 SDValue N0 = N->getOperand(0);
12894 SDValue N1 = N->getOperand(1);
12895 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12896 !N0->hasOneUse() || !N1->hasOneUse())
12897 return SDValue();
12898
12899 // Check c0 and c1.
12900 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12901 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12902 if (!N0C || !N1C)
12903 return SDValue();
12904 int64_t C0 = N0C->getSExtValue();
12905 int64_t C1 = N1C->getSExtValue();
12906 if (C0 <= 0 || C1 <= 0)
12907 return SDValue();
12908
12909 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12910 int64_t Bits = std::min(C0, C1);
12911 int64_t Diff = std::abs(C0 - C1);
12912 if (Diff != 1 && Diff != 2 && Diff != 3)
12913 return SDValue();
12914
12915 // Build nodes.
12916 SDLoc DL(N);
12917 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12918 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12919 SDValue NA0 =
12920 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
12921 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
12922 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
12923}
12924
12925// Combine a constant select operand into its use:
12926//
12927// (and (select cond, -1, c), x)
12928// -> (select cond, x, (and x, c)) [AllOnes=1]
12929// (or (select cond, 0, c), x)
12930// -> (select cond, x, (or x, c)) [AllOnes=0]
12931// (xor (select cond, 0, c), x)
12932// -> (select cond, x, (xor x, c)) [AllOnes=0]
12933// (add (select cond, 0, c), x)
12934// -> (select cond, x, (add x, c)) [AllOnes=0]
12935// (sub x, (select cond, 0, c))
12936// -> (select cond, x, (sub x, c)) [AllOnes=0]
12938 SelectionDAG &DAG, bool AllOnes,
12939 const RISCVSubtarget &Subtarget) {
12940 EVT VT = N->getValueType(0);
12941
12942 // Skip vectors.
12943 if (VT.isVector())
12944 return SDValue();
12945
12946 if (!Subtarget.hasConditionalMoveFusion()) {
12947 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12948 if ((!Subtarget.hasStdExtZicond() &&
12949 !Subtarget.hasVendorXVentanaCondOps()) ||
12950 N->getOpcode() != ISD::AND)
12951 return SDValue();
12952
12953 // Maybe harmful when condition code has multiple use.
12954 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12955 return SDValue();
12956
12957 // Maybe harmful when VT is wider than XLen.
12958 if (VT.getSizeInBits() > Subtarget.getXLen())
12959 return SDValue();
12960 }
12961
12962 if ((Slct.getOpcode() != ISD::SELECT &&
12963 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12964 !Slct.hasOneUse())
12965 return SDValue();
12966
12967 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12969 };
12970
12971 bool SwapSelectOps;
12972 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12973 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12974 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12975 SDValue NonConstantVal;
12976 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12977 SwapSelectOps = false;
12978 NonConstantVal = FalseVal;
12979 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12980 SwapSelectOps = true;
12981 NonConstantVal = TrueVal;
12982 } else
12983 return SDValue();
12984
12985 // Slct is now know to be the desired identity constant when CC is true.
12986 TrueVal = OtherOp;
12987 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12988 // Unless SwapSelectOps says the condition should be false.
12989 if (SwapSelectOps)
12990 std::swap(TrueVal, FalseVal);
12991
12992 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12993 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12994 {Slct.getOperand(0), Slct.getOperand(1),
12995 Slct.getOperand(2), TrueVal, FalseVal});
12996
12997 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12998 {Slct.getOperand(0), TrueVal, FalseVal});
12999}
13000
13001// Attempt combineSelectAndUse on each operand of a commutative operator N.
13003 bool AllOnes,
13004 const RISCVSubtarget &Subtarget) {
13005 SDValue N0 = N->getOperand(0);
13006 SDValue N1 = N->getOperand(1);
13007 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13008 return Result;
13009 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13010 return Result;
13011 return SDValue();
13012}
13013
13014// Transform (add (mul x, c0), c1) ->
13015// (add (mul (add x, c1/c0), c0), c1%c0).
13016// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13017// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13018// to an infinite loop in DAGCombine if transformed.
13019// Or transform (add (mul x, c0), c1) ->
13020// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13021// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13022// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13023// lead to an infinite loop in DAGCombine if transformed.
13024// Or transform (add (mul x, c0), c1) ->
13025// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13026// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13027// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13028// lead to an infinite loop in DAGCombine if transformed.
13029// Or transform (add (mul x, c0), c1) ->
13030// (mul (add x, c1/c0), c0).
13031// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13033 const RISCVSubtarget &Subtarget) {
13034 // Skip for vector types and larger types.
13035 EVT VT = N->getValueType(0);
13036 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13037 return SDValue();
13038 // The first operand node must be a MUL and has no other use.
13039 SDValue N0 = N->getOperand(0);
13040 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13041 return SDValue();
13042 // Check if c0 and c1 match above conditions.
13043 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13044 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13045 if (!N0C || !N1C)
13046 return SDValue();
13047 // If N0C has multiple uses it's possible one of the cases in
13048 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13049 // in an infinite loop.
13050 if (!N0C->hasOneUse())
13051 return SDValue();
13052 int64_t C0 = N0C->getSExtValue();
13053 int64_t C1 = N1C->getSExtValue();
13054 int64_t CA, CB;
13055 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13056 return SDValue();
13057 // Search for proper CA (non-zero) and CB that both are simm12.
13058 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13059 !isInt<12>(C0 * (C1 / C0))) {
13060 CA = C1 / C0;
13061 CB = C1 % C0;
13062 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13063 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13064 CA = C1 / C0 + 1;
13065 CB = C1 % C0 - C0;
13066 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13067 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13068 CA = C1 / C0 - 1;
13069 CB = C1 % C0 + C0;
13070 } else
13071 return SDValue();
13072 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13073 SDLoc DL(N);
13074 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13075 DAG.getConstant(CA, DL, VT));
13076 SDValue New1 =
13077 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
13078 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
13079}
13080
13081// add (zext, zext) -> zext (add (zext, zext))
13082// sub (zext, zext) -> sext (sub (zext, zext))
13083// mul (zext, zext) -> zext (mul (zext, zext))
13084// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13085// udiv (zext, zext) -> zext (udiv (zext, zext))
13086// srem (zext, zext) -> zext (srem (zext, zext))
13087// urem (zext, zext) -> zext (urem (zext, zext))
13088//
13089// where the sum of the extend widths match, and the the range of the bin op
13090// fits inside the width of the narrower bin op. (For profitability on rvv, we
13091// use a power of two for both inner and outer extend.)
13093
13094 EVT VT = N->getValueType(0);
13095 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13096 return SDValue();
13097
13098 SDValue N0 = N->getOperand(0);
13099 SDValue N1 = N->getOperand(1);
13101 return SDValue();
13102 if (!N0.hasOneUse() || !N1.hasOneUse())
13103 return SDValue();
13104
13105 SDValue Src0 = N0.getOperand(0);
13106 SDValue Src1 = N1.getOperand(0);
13107 EVT SrcVT = Src0.getValueType();
13108 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13109 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13110 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13111 return SDValue();
13112
13113 LLVMContext &C = *DAG.getContext();
13115 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13116
13117 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13118 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13119
13120 // Src0 and Src1 are zero extended, so they're always positive if signed.
13121 //
13122 // sub can produce a negative from two positive operands, so it needs sign
13123 // extended. Other nodes produce a positive from two positive operands, so
13124 // zero extend instead.
13125 unsigned OuterExtend =
13126 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13127
13128 return DAG.getNode(
13129 OuterExtend, SDLoc(N), VT,
13130 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13131}
13132
13133// Try to turn (add (xor bool, 1) -1) into (neg bool).
13135 SDValue N0 = N->getOperand(0);
13136 SDValue N1 = N->getOperand(1);
13137 EVT VT = N->getValueType(0);
13138 SDLoc DL(N);
13139
13140 // RHS should be -1.
13141 if (!isAllOnesConstant(N1))
13142 return SDValue();
13143
13144 // Look for (xor X, 1).
13145 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13146 return SDValue();
13147
13148 // First xor input should be 0 or 1.
13150 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13151 return SDValue();
13152
13153 // Emit a negate of the setcc.
13154 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13155 N0.getOperand(0));
13156}
13157
13159 const RISCVSubtarget &Subtarget) {
13160 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13161 return V;
13162 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13163 return V;
13164 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13165 return V;
13166 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13167 return V;
13168 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13169 return V;
13170 if (SDValue V = combineBinOpOfZExt(N, DAG))
13171 return V;
13172
13173 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13174 // (select lhs, rhs, cc, x, (add x, y))
13175 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13176}
13177
13178// Try to turn a sub boolean RHS and constant LHS into an addi.
13180 SDValue N0 = N->getOperand(0);
13181 SDValue N1 = N->getOperand(1);
13182 EVT VT = N->getValueType(0);
13183 SDLoc DL(N);
13184
13185 // Require a constant LHS.
13186 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13187 if (!N0C)
13188 return SDValue();
13189
13190 // All our optimizations involve subtracting 1 from the immediate and forming
13191 // an ADDI. Make sure the new immediate is valid for an ADDI.
13192 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13193 if (!ImmValMinus1.isSignedIntN(12))
13194 return SDValue();
13195
13196 SDValue NewLHS;
13197 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13198 // (sub constant, (setcc x, y, eq/neq)) ->
13199 // (add (setcc x, y, neq/eq), constant - 1)
13200 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13201 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13202 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13203 return SDValue();
13204 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13205 NewLHS =
13206 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13207 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13208 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13209 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13210 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13211 NewLHS = N1.getOperand(0);
13212 } else
13213 return SDValue();
13214
13215 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13216 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13217}
13218
13220 const RISCVSubtarget &Subtarget) {
13221 if (SDValue V = combineSubOfBoolean(N, DAG))
13222 return V;
13223
13224 EVT VT = N->getValueType(0);
13225 SDValue N0 = N->getOperand(0);
13226 SDValue N1 = N->getOperand(1);
13227 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13228 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13229 isNullConstant(N1.getOperand(1))) {
13230 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13231 if (CCVal == ISD::SETLT) {
13232 SDLoc DL(N);
13233 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13234 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13235 DAG.getConstant(ShAmt, DL, VT));
13236 }
13237 }
13238
13239 if (SDValue V = combineBinOpOfZExt(N, DAG))
13240 return V;
13241
13242 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13243 // (select lhs, rhs, cc, x, (sub x, y))
13244 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13245}
13246
13247// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13248// Legalizing setcc can introduce xors like this. Doing this transform reduces
13249// the number of xors and may allow the xor to fold into a branch condition.
13251 SDValue N0 = N->getOperand(0);
13252 SDValue N1 = N->getOperand(1);
13253 bool IsAnd = N->getOpcode() == ISD::AND;
13254
13255 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13256 return SDValue();
13257
13258 if (!N0.hasOneUse() || !N1.hasOneUse())
13259 return SDValue();
13260
13261 SDValue N01 = N0.getOperand(1);
13262 SDValue N11 = N1.getOperand(1);
13263
13264 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13265 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13266 // operation is And, allow one of the Xors to use -1.
13267 if (isOneConstant(N01)) {
13268 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13269 return SDValue();
13270 } else if (isOneConstant(N11)) {
13271 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13272 if (!(IsAnd && isAllOnesConstant(N01)))
13273 return SDValue();
13274 } else
13275 return SDValue();
13276
13277 EVT VT = N->getValueType(0);
13278
13279 SDValue N00 = N0.getOperand(0);
13280 SDValue N10 = N1.getOperand(0);
13281
13282 // The LHS of the xors needs to be 0/1.
13284 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13285 return SDValue();
13286
13287 // Invert the opcode and insert a new xor.
13288 SDLoc DL(N);
13289 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13290 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13291 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13292}
13293
13295 const RISCVSubtarget &Subtarget) {
13296 SDValue N0 = N->getOperand(0);
13297 EVT VT = N->getValueType(0);
13298
13299 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13300 // extending X. This is safe since we only need the LSB after the shift and
13301 // shift amounts larger than 31 would produce poison. If we wait until
13302 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13303 // to use a BEXT instruction.
13304 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13305 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13306 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13307 SDLoc DL(N0);
13308 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13309 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13310 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13311 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13312 }
13313
13314 return SDValue();
13315}
13316
13317// Combines two comparison operation and logic operation to one selection
13318// operation(min, max) and logic operation. Returns new constructed Node if
13319// conditions for optimization are satisfied.
13322 const RISCVSubtarget &Subtarget) {
13323 SelectionDAG &DAG = DCI.DAG;
13324
13325 SDValue N0 = N->getOperand(0);
13326 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13327 // extending X. This is safe since we only need the LSB after the shift and
13328 // shift amounts larger than 31 would produce poison. If we wait until
13329 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13330 // to use a BEXT instruction.
13331 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13332 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13333 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13334 N0.hasOneUse()) {
13335 SDLoc DL(N);
13336 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13337 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13338 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13339 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13340 DAG.getConstant(1, DL, MVT::i64));
13341 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13342 }
13343
13344 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13345 return V;
13346 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13347 return V;
13348
13349 if (DCI.isAfterLegalizeDAG())
13350 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13351 return V;
13352
13353 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13354 // (select lhs, rhs, cc, x, (and x, y))
13355 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13356}
13357
13358// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13359// FIXME: Generalize to other binary operators with same operand.
13361 SelectionDAG &DAG) {
13362 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13363
13364 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13366 !N0.hasOneUse() || !N1.hasOneUse())
13367 return SDValue();
13368
13369 // Should have the same condition.
13370 SDValue Cond = N0.getOperand(1);
13371 if (Cond != N1.getOperand(1))
13372 return SDValue();
13373
13374 SDValue TrueV = N0.getOperand(0);
13375 SDValue FalseV = N1.getOperand(0);
13376
13377 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13378 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13379 !isOneConstant(TrueV.getOperand(1)) ||
13380 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13381 return SDValue();
13382
13383 EVT VT = N->getValueType(0);
13384 SDLoc DL(N);
13385
13386 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13387 Cond);
13388 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13389 Cond);
13390 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13391 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13392}
13393
13395 const RISCVSubtarget &Subtarget) {
13396 SelectionDAG &DAG = DCI.DAG;
13397
13398 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13399 return V;
13400 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13401 return V;
13402
13403 if (DCI.isAfterLegalizeDAG())
13404 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13405 return V;
13406
13407 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13408 // We may be able to pull a common operation out of the true and false value.
13409 SDValue N0 = N->getOperand(0);
13410 SDValue N1 = N->getOperand(1);
13411 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13412 return V;
13413 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13414 return V;
13415
13416 // fold (or (select cond, 0, y), x) ->
13417 // (select cond, x, (or x, y))
13418 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13419}
13420
13422 const RISCVSubtarget &Subtarget) {
13423 SDValue N0 = N->getOperand(0);
13424 SDValue N1 = N->getOperand(1);
13425
13426 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13427 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13428 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13429 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13430 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13431 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13432 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13433 SDLoc DL(N);
13434 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13435 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13436 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13437 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13438 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13439 }
13440
13441 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13442 // NOTE: Assumes ROL being legal means ROLW is legal.
13443 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13444 if (N0.getOpcode() == RISCVISD::SLLW &&
13446 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13447 SDLoc DL(N);
13448 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13449 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13450 }
13451
13452 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13453 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13454 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13455 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13456 if (ConstN00 && CC == ISD::SETLT) {
13457 EVT VT = N0.getValueType();
13458 SDLoc DL(N0);
13459 const APInt &Imm = ConstN00->getAPIntValue();
13460 if ((Imm + 1).isSignedIntN(12))
13461 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13462 DAG.getConstant(Imm + 1, DL, VT), CC);
13463 }
13464 }
13465
13466 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13467 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13468 // would have been promoted to i32, but the setcc would have i64 result.
13469 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13470 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13471 SDValue N00 = N0.getOperand(0);
13472 SDLoc DL(N);
13473 SDValue LHS = N00.getOperand(0);
13474 SDValue RHS = N00.getOperand(1);
13475 SDValue CC = N00.getOperand(2);
13476 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13477 LHS.getValueType());
13478 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13479 LHS, RHS, NotCC);
13480 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13481 }
13482
13483 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13484 return V;
13485 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13486 return V;
13487
13488 // fold (xor (select cond, 0, y), x) ->
13489 // (select cond, x, (xor x, y))
13490 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13491}
13492
13493// Try to expand a scalar multiply to a faster sequence.
13496 const RISCVSubtarget &Subtarget) {
13497
13498 EVT VT = N->getValueType(0);
13499
13500 // LI + MUL is usually smaller than the alternative sequence.
13502 return SDValue();
13503
13504 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13505 return SDValue();
13506
13507 if (VT != Subtarget.getXLenVT())
13508 return SDValue();
13509
13510 if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
13511 return SDValue();
13512
13513 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13514 if (!CNode)
13515 return SDValue();
13516 uint64_t MulAmt = CNode->getZExtValue();
13517
13518 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13519 // We're adding additional uses of X here, and in principle, we should be freezing
13520 // X before doing so. However, adding freeze here causes real regressions, and no
13521 // other target properly freezes X in these cases either.
13522 SDValue X = N->getOperand(0);
13523
13524 for (uint64_t Divisor : {3, 5, 9}) {
13525 if (MulAmt % Divisor != 0)
13526 continue;
13527 uint64_t MulAmt2 = MulAmt / Divisor;
13528 // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
13529 // Matched in tablegen, avoid perturbing patterns.
13530 if (isPowerOf2_64(MulAmt2))
13531 return SDValue();
13532
13533 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13534 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13535 SDLoc DL(N);
13536 SDValue Mul359 =
13537 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13538 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13539 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13540 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13541 Mul359);
13542 }
13543 }
13544
13545 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13546 // shXadd. First check if this a sum of two power of 2s because that's
13547 // easy. Then count how many zeros are up to the first bit.
13548 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13549 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13550 if (ScaleShift >= 1 && ScaleShift < 4) {
13551 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13552 SDLoc DL(N);
13553 SDValue Shift1 =
13554 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13555 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13556 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13557 }
13558 }
13559
13560 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13561 // This is the two instruction form, there are also three instruction
13562 // variants we could implement. e.g.
13563 // (2^(1,2,3) * 3,5,9 + 1) << C2
13564 // 2^(C1>3) * 3,5,9 +/- 1
13565 for (uint64_t Divisor : {3, 5, 9}) {
13566 uint64_t C = MulAmt - 1;
13567 if (C <= Divisor)
13568 continue;
13569 unsigned TZ = llvm::countr_zero(C);
13570 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13571 SDLoc DL(N);
13572 SDValue Mul359 =
13573 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13574 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13575 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13576 DAG.getConstant(TZ, DL, VT), X);
13577 }
13578 }
13579
13580 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13581 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13582 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13583 if (ScaleShift >= 1 && ScaleShift < 4) {
13584 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13585 SDLoc DL(N);
13586 SDValue Shift1 =
13587 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13588 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13589 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13590 DAG.getConstant(ScaleShift, DL, VT), X));
13591 }
13592 }
13593
13594 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13595 for (uint64_t Offset : {3, 5, 9}) {
13596 if (isPowerOf2_64(MulAmt + Offset)) {
13597 SDLoc DL(N);
13598 SDValue Shift1 =
13599 DAG.getNode(ISD::SHL, DL, VT, X,
13600 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13601 SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13602 DAG.getConstant(Log2_64(Offset - 1), DL, VT),
13603 X);
13604 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13605 }
13606 }
13607
13608 return SDValue();
13609}
13610
13611
13614 const RISCVSubtarget &Subtarget) {
13615 EVT VT = N->getValueType(0);
13616 if (!VT.isVector())
13617 return expandMul(N, DAG, DCI, Subtarget);
13618
13619 SDLoc DL(N);
13620 SDValue N0 = N->getOperand(0);
13621 SDValue N1 = N->getOperand(1);
13622 SDValue MulOper;
13623 unsigned AddSubOpc;
13624
13625 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13626 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13627 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13628 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13629 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13630 AddSubOpc = V->getOpcode();
13631 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13632 SDValue Opnd = V->getOperand(1);
13633 MulOper = V->getOperand(0);
13634 if (AddSubOpc == ISD::SUB)
13635 std::swap(Opnd, MulOper);
13636 if (isOneOrOneSplat(Opnd))
13637 return true;
13638 }
13639 return false;
13640 };
13641
13642 if (IsAddSubWith1(N0)) {
13643 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13644 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13645 }
13646
13647 if (IsAddSubWith1(N1)) {
13648 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13649 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13650 }
13651
13652 if (SDValue V = combineBinOpOfZExt(N, DAG))
13653 return V;
13654
13655 return SDValue();
13656}
13657
13658/// According to the property that indexed load/store instructions zero-extend
13659/// their indices, try to narrow the type of index operand.
13660static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13661 if (isIndexTypeSigned(IndexType))
13662 return false;
13663
13664 if (!N->hasOneUse())
13665 return false;
13666
13667 EVT VT = N.getValueType();
13668 SDLoc DL(N);
13669
13670 // In general, what we're doing here is seeing if we can sink a truncate to
13671 // a smaller element type into the expression tree building our index.
13672 // TODO: We can generalize this and handle a bunch more cases if useful.
13673
13674 // Narrow a buildvector to the narrowest element type. This requires less
13675 // work and less register pressure at high LMUL, and creates smaller constants
13676 // which may be cheaper to materialize.
13677 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13678 KnownBits Known = DAG.computeKnownBits(N);
13679 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13680 LLVMContext &C = *DAG.getContext();
13681 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13682 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13683 N = DAG.getNode(ISD::TRUNCATE, DL,
13684 VT.changeVectorElementType(ResultVT), N);
13685 return true;
13686 }
13687 }
13688
13689 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13690 if (N.getOpcode() != ISD::SHL)
13691 return false;
13692
13693 SDValue N0 = N.getOperand(0);
13694 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13696 return false;
13697 if (!N0->hasOneUse())
13698 return false;
13699
13700 APInt ShAmt;
13701 SDValue N1 = N.getOperand(1);
13702 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13703 return false;
13704
13705 SDValue Src = N0.getOperand(0);
13706 EVT SrcVT = Src.getValueType();
13707 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13708 unsigned ShAmtV = ShAmt.getZExtValue();
13709 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13710 NewElen = std::max(NewElen, 8U);
13711
13712 // Skip if NewElen is not narrower than the original extended type.
13713 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13714 return false;
13715
13716 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13717 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13718
13719 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13720 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13721 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13722 return true;
13723}
13724
13725// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13726// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13727// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13728// can become a sext.w instead of a shift pair.
13730 const RISCVSubtarget &Subtarget) {
13731 SDValue N0 = N->getOperand(0);
13732 SDValue N1 = N->getOperand(1);
13733 EVT VT = N->getValueType(0);
13734 EVT OpVT = N0.getValueType();
13735
13736 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13737 return SDValue();
13738
13739 // RHS needs to be a constant.
13740 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13741 if (!N1C)
13742 return SDValue();
13743
13744 // LHS needs to be (and X, 0xffffffff).
13745 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13746 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13747 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13748 return SDValue();
13749
13750 // Looking for an equality compare.
13751 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13752 if (!isIntEqualitySetCC(Cond))
13753 return SDValue();
13754
13755 // Don't do this if the sign bit is provably zero, it will be turned back into
13756 // an AND.
13757 APInt SignMask = APInt::getOneBitSet(64, 31);
13758 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13759 return SDValue();
13760
13761 const APInt &C1 = N1C->getAPIntValue();
13762
13763 SDLoc dl(N);
13764 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13765 // to be equal.
13766 if (C1.getActiveBits() > 32)
13767 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13768
13769 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13770 N0.getOperand(0), DAG.getValueType(MVT::i32));
13771 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13772 dl, OpVT), Cond);
13773}
13774
13775static SDValue
13777 const RISCVSubtarget &Subtarget) {
13778 SDValue Src = N->getOperand(0);
13779 EVT VT = N->getValueType(0);
13780
13781 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13782 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13783 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
13784 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13785 Src.getOperand(0));
13786
13787 return SDValue();
13788}
13789
13790namespace {
13791// Forward declaration of the structure holding the necessary information to
13792// apply a combine.
13793struct CombineResult;
13794
13795enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13796/// Helper class for folding sign/zero extensions.
13797/// In particular, this class is used for the following combines:
13798/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13799/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13800/// mul | mul_vl -> vwmul(u) | vwmul_su
13801/// shl | shl_vl -> vwsll
13802/// fadd -> vfwadd | vfwadd_w
13803/// fsub -> vfwsub | vfwsub_w
13804/// fmul -> vfwmul
13805/// An object of this class represents an operand of the operation we want to
13806/// combine.
13807/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13808/// NodeExtensionHelper for `a` and one for `b`.
13809///
13810/// This class abstracts away how the extension is materialized and
13811/// how its number of users affect the combines.
13812///
13813/// In particular:
13814/// - VWADD_W is conceptually == add(op0, sext(op1))
13815/// - VWADDU_W == add(op0, zext(op1))
13816/// - VWSUB_W == sub(op0, sext(op1))
13817/// - VWSUBU_W == sub(op0, zext(op1))
13818/// - VFWADD_W == fadd(op0, fpext(op1))
13819/// - VFWSUB_W == fsub(op0, fpext(op1))
13820/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13821/// zext|sext(smaller_value).
13822struct NodeExtensionHelper {
13823 /// Records if this operand is like being zero extended.
13824 bool SupportsZExt;
13825 /// Records if this operand is like being sign extended.
13826 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13827 /// instance, a splat constant (e.g., 3), would support being both sign and
13828 /// zero extended.
13829 bool SupportsSExt;
13830 /// Records if this operand is like being floating-Point extended.
13831 bool SupportsFPExt;
13832 /// This boolean captures whether we care if this operand would still be
13833 /// around after the folding happens.
13834 bool EnforceOneUse;
13835 /// Original value that this NodeExtensionHelper represents.
13836 SDValue OrigOperand;
13837
13838 /// Get the value feeding the extension or the value itself.
13839 /// E.g., for zext(a), this would return a.
13840 SDValue getSource() const {
13841 switch (OrigOperand.getOpcode()) {
13842 case ISD::ZERO_EXTEND:
13843 case ISD::SIGN_EXTEND:
13844 case RISCVISD::VSEXT_VL:
13845 case RISCVISD::VZEXT_VL:
13847 return OrigOperand.getOperand(0);
13848 default:
13849 return OrigOperand;
13850 }
13851 }
13852
13853 /// Check if this instance represents a splat.
13854 bool isSplat() const {
13855 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13856 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13857 }
13858
13859 /// Get the extended opcode.
13860 unsigned getExtOpc(ExtKind SupportsExt) const {
13861 switch (SupportsExt) {
13862 case ExtKind::SExt:
13863 return RISCVISD::VSEXT_VL;
13864 case ExtKind::ZExt:
13865 return RISCVISD::VZEXT_VL;
13866 case ExtKind::FPExt:
13868 }
13869 llvm_unreachable("Unknown ExtKind enum");
13870 }
13871
13872 /// Get or create a value that can feed \p Root with the given extension \p
13873 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13874 /// operand. \see ::getSource().
13875 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13876 const RISCVSubtarget &Subtarget,
13877 std::optional<ExtKind> SupportsExt) const {
13878 if (!SupportsExt.has_value())
13879 return OrigOperand;
13880
13881 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13882
13883 SDValue Source = getSource();
13884 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13885 if (Source.getValueType() == NarrowVT)
13886 return Source;
13887
13888 unsigned ExtOpc = getExtOpc(*SupportsExt);
13889
13890 // If we need an extension, we should be changing the type.
13891 SDLoc DL(OrigOperand);
13892 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13893 switch (OrigOperand.getOpcode()) {
13894 case ISD::ZERO_EXTEND:
13895 case ISD::SIGN_EXTEND:
13896 case RISCVISD::VSEXT_VL:
13897 case RISCVISD::VZEXT_VL:
13899 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13900 case ISD::SPLAT_VECTOR:
13901 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
13903 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13904 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
13905 default:
13906 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13907 // and that operand should already have the right NarrowVT so no
13908 // extension should be required at this point.
13909 llvm_unreachable("Unsupported opcode");
13910 }
13911 }
13912
13913 /// Helper function to get the narrow type for \p Root.
13914 /// The narrow type is the type of \p Root where we divided the size of each
13915 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13916 /// \pre Both the narrow type and the original type should be legal.
13917 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
13918 MVT VT = Root->getSimpleValueType(0);
13919
13920 // Determine the narrow size.
13921 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13922
13923 MVT EltVT = SupportsExt == ExtKind::FPExt
13924 ? MVT::getFloatingPointVT(NarrowSize)
13925 : MVT::getIntegerVT(NarrowSize);
13926
13927 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
13928 "Trying to extend something we can't represent");
13929 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
13930 return NarrowVT;
13931 }
13932
13933 /// Get the opcode to materialize:
13934 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13935 static unsigned getSExtOpcode(unsigned Opcode) {
13936 switch (Opcode) {
13937 case ISD::ADD:
13938 case RISCVISD::ADD_VL:
13941 case ISD::OR:
13942 return RISCVISD::VWADD_VL;
13943 case ISD::SUB:
13944 case RISCVISD::SUB_VL:
13947 return RISCVISD::VWSUB_VL;
13948 case ISD::MUL:
13949 case RISCVISD::MUL_VL:
13950 return RISCVISD::VWMUL_VL;
13951 default:
13952 llvm_unreachable("Unexpected opcode");
13953 }
13954 }
13955
13956 /// Get the opcode to materialize:
13957 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
13958 static unsigned getZExtOpcode(unsigned Opcode) {
13959 switch (Opcode) {
13960 case ISD::ADD:
13961 case RISCVISD::ADD_VL:
13964 case ISD::OR:
13965 return RISCVISD::VWADDU_VL;
13966 case ISD::SUB:
13967 case RISCVISD::SUB_VL:
13970 return RISCVISD::VWSUBU_VL;
13971 case ISD::MUL:
13972 case RISCVISD::MUL_VL:
13973 return RISCVISD::VWMULU_VL;
13974 case ISD::SHL:
13975 case RISCVISD::SHL_VL:
13976 return RISCVISD::VWSLL_VL;
13977 default:
13978 llvm_unreachable("Unexpected opcode");
13979 }
13980 }
13981
13982 /// Get the opcode to materialize:
13983 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
13984 static unsigned getFPExtOpcode(unsigned Opcode) {
13985 switch (Opcode) {
13986 case RISCVISD::FADD_VL:
13988 return RISCVISD::VFWADD_VL;
13989 case RISCVISD::FSUB_VL:
13991 return RISCVISD::VFWSUB_VL;
13992 case RISCVISD::FMUL_VL:
13993 return RISCVISD::VFWMUL_VL;
13994 default:
13995 llvm_unreachable("Unexpected opcode");
13996 }
13997 }
13998
13999 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
14000 /// newOpcode(a, b).
14001 static unsigned getSUOpcode(unsigned Opcode) {
14002 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
14003 "SU is only supported for MUL");
14004 return RISCVISD::VWMULSU_VL;
14005 }
14006
14007 /// Get the opcode to materialize
14008 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
14009 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
14010 switch (Opcode) {
14011 case ISD::ADD:
14012 case RISCVISD::ADD_VL:
14013 case ISD::OR:
14014 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
14016 case ISD::SUB:
14017 case RISCVISD::SUB_VL:
14018 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
14020 case RISCVISD::FADD_VL:
14021 return RISCVISD::VFWADD_W_VL;
14022 case RISCVISD::FSUB_VL:
14023 return RISCVISD::VFWSUB_W_VL;
14024 default:
14025 llvm_unreachable("Unexpected opcode");
14026 }
14027 }
14028
14029 using CombineToTry = std::function<std::optional<CombineResult>(
14030 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
14031 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
14032 const RISCVSubtarget &)>;
14033
14034 /// Check if this node needs to be fully folded or extended for all users.
14035 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
14036
14037 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
14038 const RISCVSubtarget &Subtarget) {
14039 unsigned Opc = OrigOperand.getOpcode();
14040 MVT VT = OrigOperand.getSimpleValueType();
14041
14042 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
14043 "Unexpected Opcode");
14044
14045 // The pasthru must be undef for tail agnostic.
14046 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
14047 return;
14048
14049 // Get the scalar value.
14050 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
14051 : OrigOperand.getOperand(1);
14052
14053 // See if we have enough sign bits or zero bits in the scalar to use a
14054 // widening opcode by splatting to smaller element size.
14055 unsigned EltBits = VT.getScalarSizeInBits();
14056 unsigned ScalarBits = Op.getValueSizeInBits();
14057 // Make sure we're getting all element bits from the scalar register.
14058 // FIXME: Support implicit sign extension of vmv.v.x?
14059 if (ScalarBits < EltBits)
14060 return;
14061
14062 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14063 // If the narrow type cannot be expressed with a legal VMV,
14064 // this is not a valid candidate.
14065 if (NarrowSize < 8)
14066 return;
14067
14068 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
14069 SupportsSExt = true;
14070
14071 if (DAG.MaskedValueIsZero(Op,
14072 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
14073 SupportsZExt = true;
14074
14075 EnforceOneUse = false;
14076 }
14077
14078 /// Helper method to set the various fields of this struct based on the
14079 /// type of \p Root.
14080 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
14081 const RISCVSubtarget &Subtarget) {
14082 SupportsZExt = false;
14083 SupportsSExt = false;
14084 SupportsFPExt = false;
14085 EnforceOneUse = true;
14086 unsigned Opc = OrigOperand.getOpcode();
14087 // For the nodes we handle below, we end up using their inputs directly: see
14088 // getSource(). However since they either don't have a passthru or we check
14089 // that their passthru is undef, we can safely ignore their mask and VL.
14090 switch (Opc) {
14091 case ISD::ZERO_EXTEND:
14092 case ISD::SIGN_EXTEND: {
14093 MVT VT = OrigOperand.getSimpleValueType();
14094 if (!VT.isVector())
14095 break;
14096
14097 SDValue NarrowElt = OrigOperand.getOperand(0);
14098 MVT NarrowVT = NarrowElt.getSimpleValueType();
14099 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14100 if (NarrowVT.getVectorElementType() == MVT::i1)
14101 break;
14102
14103 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14104 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14105 break;
14106 }
14107 case RISCVISD::VZEXT_VL:
14108 SupportsZExt = true;
14109 break;
14110 case RISCVISD::VSEXT_VL:
14111 SupportsSExt = true;
14112 break;
14114 SupportsFPExt = true;
14115 break;
14116 case ISD::SPLAT_VECTOR:
14118 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14119 break;
14120 default:
14121 break;
14122 }
14123 }
14124
14125 /// Check if \p Root supports any extension folding combines.
14126 static bool isSupportedRoot(const SDNode *Root,
14127 const RISCVSubtarget &Subtarget) {
14128 switch (Root->getOpcode()) {
14129 case ISD::ADD:
14130 case ISD::SUB:
14131 case ISD::MUL: {
14132 return Root->getValueType(0).isScalableVector();
14133 }
14134 case ISD::OR: {
14135 return Root->getValueType(0).isScalableVector() &&
14136 Root->getFlags().hasDisjoint();
14137 }
14138 // Vector Widening Integer Add/Sub/Mul Instructions
14139 case RISCVISD::ADD_VL:
14140 case RISCVISD::MUL_VL:
14143 case RISCVISD::SUB_VL:
14146 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14147 case RISCVISD::FADD_VL:
14148 case RISCVISD::FSUB_VL:
14149 case RISCVISD::FMUL_VL:
14152 return true;
14153 case ISD::SHL:
14154 return Root->getValueType(0).isScalableVector() &&
14155 Subtarget.hasStdExtZvbb();
14156 case RISCVISD::SHL_VL:
14157 return Subtarget.hasStdExtZvbb();
14158 default:
14159 return false;
14160 }
14161 }
14162
14163 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14164 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14165 const RISCVSubtarget &Subtarget) {
14166 assert(isSupportedRoot(Root, Subtarget) &&
14167 "Trying to build an helper with an "
14168 "unsupported root");
14169 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14171 OrigOperand = Root->getOperand(OperandIdx);
14172
14173 unsigned Opc = Root->getOpcode();
14174 switch (Opc) {
14175 // We consider
14176 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14177 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14178 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14185 if (OperandIdx == 1) {
14186 SupportsZExt =
14188 SupportsSExt =
14190 SupportsFPExt =
14192 // There's no existing extension here, so we don't have to worry about
14193 // making sure it gets removed.
14194 EnforceOneUse = false;
14195 break;
14196 }
14197 [[fallthrough]];
14198 default:
14199 fillUpExtensionSupport(Root, DAG, Subtarget);
14200 break;
14201 }
14202 }
14203
14204 /// Helper function to get the Mask and VL from \p Root.
14205 static std::pair<SDValue, SDValue>
14206 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14207 const RISCVSubtarget &Subtarget) {
14208 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14209 switch (Root->getOpcode()) {
14210 case ISD::ADD:
14211 case ISD::SUB:
14212 case ISD::MUL:
14213 case ISD::OR:
14214 case ISD::SHL: {
14215 SDLoc DL(Root);
14216 MVT VT = Root->getSimpleValueType(0);
14217 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14218 }
14219 default:
14220 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14221 }
14222 }
14223
14224 /// Helper function to check if \p N is commutative with respect to the
14225 /// foldings that are supported by this class.
14226 static bool isCommutative(const SDNode *N) {
14227 switch (N->getOpcode()) {
14228 case ISD::ADD:
14229 case ISD::MUL:
14230 case ISD::OR:
14231 case RISCVISD::ADD_VL:
14232 case RISCVISD::MUL_VL:
14235 case RISCVISD::FADD_VL:
14236 case RISCVISD::FMUL_VL:
14238 return true;
14239 case ISD::SUB:
14240 case RISCVISD::SUB_VL:
14243 case RISCVISD::FSUB_VL:
14245 case ISD::SHL:
14246 case RISCVISD::SHL_VL:
14247 return false;
14248 default:
14249 llvm_unreachable("Unexpected opcode");
14250 }
14251 }
14252
14253 /// Get a list of combine to try for folding extensions in \p Root.
14254 /// Note that each returned CombineToTry function doesn't actually modify
14255 /// anything. Instead they produce an optional CombineResult that if not None,
14256 /// need to be materialized for the combine to be applied.
14257 /// \see CombineResult::materialize.
14258 /// If the related CombineToTry function returns std::nullopt, that means the
14259 /// combine didn't match.
14260 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14261};
14262
14263/// Helper structure that holds all the necessary information to materialize a
14264/// combine that does some extension folding.
14265struct CombineResult {
14266 /// Opcode to be generated when materializing the combine.
14267 unsigned TargetOpcode;
14268 // No value means no extension is needed.
14269 std::optional<ExtKind> LHSExt;
14270 std::optional<ExtKind> RHSExt;
14271 /// Root of the combine.
14272 SDNode *Root;
14273 /// LHS of the TargetOpcode.
14274 NodeExtensionHelper LHS;
14275 /// RHS of the TargetOpcode.
14276 NodeExtensionHelper RHS;
14277
14278 CombineResult(unsigned TargetOpcode, SDNode *Root,
14279 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14280 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14281 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14282 LHS(LHS), RHS(RHS) {}
14283
14284 /// Return a value that uses TargetOpcode and that can be used to replace
14285 /// Root.
14286 /// The actual replacement is *not* done in that method.
14287 SDValue materialize(SelectionDAG &DAG,
14288 const RISCVSubtarget &Subtarget) const {
14289 SDValue Mask, VL, Merge;
14290 std::tie(Mask, VL) =
14291 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14292 switch (Root->getOpcode()) {
14293 default:
14294 Merge = Root->getOperand(2);
14295 break;
14296 case ISD::ADD:
14297 case ISD::SUB:
14298 case ISD::MUL:
14299 case ISD::OR:
14300 case ISD::SHL:
14301 Merge = DAG.getUNDEF(Root->getValueType(0));
14302 break;
14303 }
14304 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14305 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14306 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14307 Merge, Mask, VL);
14308 }
14309};
14310
14311/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14312/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14313/// are zext) and LHS and RHS can be folded into Root.
14314/// AllowExtMask define which form `ext` can take in this pattern.
14315///
14316/// \note If the pattern can match with both zext and sext, the returned
14317/// CombineResult will feature the zext result.
14318///
14319/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14320/// can be used to apply the pattern.
14321static std::optional<CombineResult>
14322canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14323 const NodeExtensionHelper &RHS,
14324 uint8_t AllowExtMask, SelectionDAG &DAG,
14325 const RISCVSubtarget &Subtarget) {
14326 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14327 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14328 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14329 /*RHSExt=*/{ExtKind::ZExt});
14330 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14331 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14332 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14333 /*RHSExt=*/{ExtKind::SExt});
14334 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14335 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14336 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14337 /*RHSExt=*/{ExtKind::FPExt});
14338 return std::nullopt;
14339}
14340
14341/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14342/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14343/// are zext) and LHS and RHS can be folded into Root.
14344///
14345/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14346/// can be used to apply the pattern.
14347static std::optional<CombineResult>
14348canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14349 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14350 const RISCVSubtarget &Subtarget) {
14351 return canFoldToVWWithSameExtensionImpl(
14352 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14353 Subtarget);
14354}
14355
14356/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14357///
14358/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14359/// can be used to apply the pattern.
14360static std::optional<CombineResult>
14361canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14362 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14363 const RISCVSubtarget &Subtarget) {
14364 if (RHS.SupportsFPExt)
14365 return CombineResult(
14366 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14367 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14368
14369 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14370 // sext/zext?
14371 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14372 // purposes.
14373 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14374 return CombineResult(
14375 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14376 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14377 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14378 return CombineResult(
14379 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14380 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14381 return std::nullopt;
14382}
14383
14384/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14385///
14386/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14387/// can be used to apply the pattern.
14388static std::optional<CombineResult>
14389canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14390 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14391 const RISCVSubtarget &Subtarget) {
14392 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14393 Subtarget);
14394}
14395
14396/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14397///
14398/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14399/// can be used to apply the pattern.
14400static std::optional<CombineResult>
14401canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14402 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14403 const RISCVSubtarget &Subtarget) {
14404 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14405 Subtarget);
14406}
14407
14408/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14409///
14410/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14411/// can be used to apply the pattern.
14412static std::optional<CombineResult>
14413canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14414 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14415 const RISCVSubtarget &Subtarget) {
14416 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14417 Subtarget);
14418}
14419
14420/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14421///
14422/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14423/// can be used to apply the pattern.
14424static std::optional<CombineResult>
14425canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14426 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14427 const RISCVSubtarget &Subtarget) {
14428
14429 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14430 return std::nullopt;
14431 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14432 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14433 /*RHSExt=*/{ExtKind::ZExt});
14434}
14435
14437NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14438 SmallVector<CombineToTry> Strategies;
14439 switch (Root->getOpcode()) {
14440 case ISD::ADD:
14441 case ISD::SUB:
14442 case ISD::OR:
14443 case RISCVISD::ADD_VL:
14444 case RISCVISD::SUB_VL:
14445 case RISCVISD::FADD_VL:
14446 case RISCVISD::FSUB_VL:
14447 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14448 Strategies.push_back(canFoldToVWWithSameExtension);
14449 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14450 Strategies.push_back(canFoldToVW_W);
14451 break;
14452 case RISCVISD::FMUL_VL:
14453 Strategies.push_back(canFoldToVWWithSameExtension);
14454 break;
14455 case ISD::MUL:
14456 case RISCVISD::MUL_VL:
14457 // mul -> vwmul(u)
14458 Strategies.push_back(canFoldToVWWithSameExtension);
14459 // mul -> vwmulsu
14460 Strategies.push_back(canFoldToVW_SU);
14461 break;
14462 case ISD::SHL:
14463 case RISCVISD::SHL_VL:
14464 // shl -> vwsll
14465 Strategies.push_back(canFoldToVWWithZEXT);
14466 break;
14469 // vwadd_w|vwsub_w -> vwadd|vwsub
14470 Strategies.push_back(canFoldToVWWithSEXT);
14471 break;
14474 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14475 Strategies.push_back(canFoldToVWWithZEXT);
14476 break;
14479 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14480 Strategies.push_back(canFoldToVWWithFPEXT);
14481 break;
14482 default:
14483 llvm_unreachable("Unexpected opcode");
14484 }
14485 return Strategies;
14486}
14487} // End anonymous namespace.
14488
14489/// Combine a binary operation to its equivalent VW or VW_W form.
14490/// The supported combines are:
14491/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14492/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14493/// mul | mul_vl -> vwmul(u) | vwmul_su
14494/// shl | shl_vl -> vwsll
14495/// fadd_vl -> vfwadd | vfwadd_w
14496/// fsub_vl -> vfwsub | vfwsub_w
14497/// fmul_vl -> vfwmul
14498/// vwadd_w(u) -> vwadd(u)
14499/// vwsub_w(u) -> vwsub(u)
14500/// vfwadd_w -> vfwadd
14501/// vfwsub_w -> vfwsub
14504 const RISCVSubtarget &Subtarget) {
14505 SelectionDAG &DAG = DCI.DAG;
14506 if (DCI.isBeforeLegalize())
14507 return SDValue();
14508
14509 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14510 return SDValue();
14511
14512 SmallVector<SDNode *> Worklist;
14513 SmallSet<SDNode *, 8> Inserted;
14514 Worklist.push_back(N);
14515 Inserted.insert(N);
14516 SmallVector<CombineResult> CombinesToApply;
14517
14518 while (!Worklist.empty()) {
14519 SDNode *Root = Worklist.pop_back_val();
14520 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14521 return SDValue();
14522
14523 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
14524 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
14525 auto AppendUsersIfNeeded = [&Worklist,
14526 &Inserted](const NodeExtensionHelper &Op) {
14527 if (Op.needToPromoteOtherUsers()) {
14528 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14529 if (Inserted.insert(TheUse).second)
14530 Worklist.push_back(TheUse);
14531 }
14532 }
14533 };
14534
14535 // Control the compile time by limiting the number of node we look at in
14536 // total.
14537 if (Inserted.size() > ExtensionMaxWebSize)
14538 return SDValue();
14539
14541 NodeExtensionHelper::getSupportedFoldings(N);
14542
14543 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14544 bool Matched = false;
14545 for (int Attempt = 0;
14546 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
14547 ++Attempt) {
14548
14549 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14550 FoldingStrategies) {
14551 std::optional<CombineResult> Res =
14552 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
14553 if (Res) {
14554 Matched = true;
14555 CombinesToApply.push_back(*Res);
14556 // All the inputs that are extended need to be folded, otherwise
14557 // we would be leaving the old input (since it is may still be used),
14558 // and the new one.
14559 if (Res->LHSExt.has_value())
14560 AppendUsersIfNeeded(LHS);
14561 if (Res->RHSExt.has_value())
14562 AppendUsersIfNeeded(RHS);
14563 break;
14564 }
14565 }
14566 std::swap(LHS, RHS);
14567 }
14568 // Right now we do an all or nothing approach.
14569 if (!Matched)
14570 return SDValue();
14571 }
14572 // Store the value for the replacement of the input node separately.
14573 SDValue InputRootReplacement;
14574 // We do the RAUW after we materialize all the combines, because some replaced
14575 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14576 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14577 // yet-to-be-visited CombinesToApply roots.
14579 ValuesToReplace.reserve(CombinesToApply.size());
14580 for (CombineResult Res : CombinesToApply) {
14581 SDValue NewValue = Res.materialize(DAG, Subtarget);
14582 if (!InputRootReplacement) {
14583 assert(Res.Root == N &&
14584 "First element is expected to be the current node");
14585 InputRootReplacement = NewValue;
14586 } else {
14587 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14588 }
14589 }
14590 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14591 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14592 DCI.AddToWorklist(OldNewValues.second.getNode());
14593 }
14594 return InputRootReplacement;
14595}
14596
14597// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14598// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14599// y will be the Passthru and cond will be the Mask.
14601 unsigned Opc = N->getOpcode();
14604
14605 SDValue Y = N->getOperand(0);
14606 SDValue MergeOp = N->getOperand(1);
14607 unsigned MergeOpc = MergeOp.getOpcode();
14608
14609 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14610 return SDValue();
14611
14612 SDValue X = MergeOp->getOperand(1);
14613
14614 if (!MergeOp.hasOneUse())
14615 return SDValue();
14616
14617 // Passthru should be undef
14618 SDValue Passthru = N->getOperand(2);
14619 if (!Passthru.isUndef())
14620 return SDValue();
14621
14622 // Mask should be all ones
14623 SDValue Mask = N->getOperand(3);
14624 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14625 return SDValue();
14626
14627 // False value of MergeOp should be all zeros
14628 SDValue Z = MergeOp->getOperand(2);
14629
14630 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14631 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14632 Z = Z.getOperand(1);
14633
14634 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14635 return SDValue();
14636
14637 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14638 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14639 N->getFlags());
14640}
14641
14644 const RISCVSubtarget &Subtarget) {
14645 [[maybe_unused]] unsigned Opc = N->getOpcode();
14648
14649 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14650 return V;
14651
14652 return combineVWADDSUBWSelect(N, DCI.DAG);
14653}
14654
14655// Helper function for performMemPairCombine.
14656// Try to combine the memory loads/stores LSNode1 and LSNode2
14657// into a single memory pair operation.
14659 LSBaseSDNode *LSNode2, SDValue BasePtr,
14660 uint64_t Imm) {
14662 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14663
14664 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14665 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14666 return SDValue();
14667
14669 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14670
14671 // The new operation has twice the width.
14672 MVT XLenVT = Subtarget.getXLenVT();
14673 EVT MemVT = LSNode1->getMemoryVT();
14674 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14675 MachineMemOperand *MMO = LSNode1->getMemOperand();
14677 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14678
14679 if (LSNode1->getOpcode() == ISD::LOAD) {
14680 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14681 unsigned Opcode;
14682 if (MemVT == MVT::i32)
14683 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14684 else
14685 Opcode = RISCVISD::TH_LDD;
14686
14687 SDValue Res = DAG.getMemIntrinsicNode(
14688 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14689 {LSNode1->getChain(), BasePtr,
14690 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14691 NewMemVT, NewMMO);
14692
14693 SDValue Node1 =
14694 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14695 SDValue Node2 =
14696 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14697
14698 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14699 return Node1;
14700 } else {
14701 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14702
14703 SDValue Res = DAG.getMemIntrinsicNode(
14704 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14705 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14706 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14707 NewMemVT, NewMMO);
14708
14709 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14710 return Res;
14711 }
14712}
14713
14714// Try to combine two adjacent loads/stores to a single pair instruction from
14715// the XTHeadMemPair vendor extension.
14718 SelectionDAG &DAG = DCI.DAG;
14720 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14721
14722 // Target does not support load/store pair.
14723 if (!Subtarget.hasVendorXTHeadMemPair())
14724 return SDValue();
14725
14726 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14727 EVT MemVT = LSNode1->getMemoryVT();
14728 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14729
14730 // No volatile, indexed or atomic loads/stores.
14731 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14732 return SDValue();
14733
14734 // Function to get a base + constant representation from a memory value.
14735 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14736 if (Ptr->getOpcode() == ISD::ADD)
14737 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14738 return {Ptr->getOperand(0), C1->getZExtValue()};
14739 return {Ptr, 0};
14740 };
14741
14742 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14743
14744 SDValue Chain = N->getOperand(0);
14745 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14746 UI != UE; ++UI) {
14747 SDUse &Use = UI.getUse();
14748 if (Use.getUser() != N && Use.getResNo() == 0 &&
14749 Use.getUser()->getOpcode() == N->getOpcode()) {
14750 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14751
14752 // No volatile, indexed or atomic loads/stores.
14753 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14754 continue;
14755
14756 // Check if LSNode1 and LSNode2 have the same type and extension.
14757 if (LSNode1->getOpcode() == ISD::LOAD)
14758 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14759 cast<LoadSDNode>(LSNode1)->getExtensionType())
14760 continue;
14761
14762 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14763 continue;
14764
14765 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14766
14767 // Check if the base pointer is the same for both instruction.
14768 if (Base1 != Base2)
14769 continue;
14770
14771 // Check if the offsets match the XTHeadMemPair encoding contraints.
14772 bool Valid = false;
14773 if (MemVT == MVT::i32) {
14774 // Check for adjacent i32 values and a 2-bit index.
14775 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14776 Valid = true;
14777 } else if (MemVT == MVT::i64) {
14778 // Check for adjacent i64 values and a 2-bit index.
14779 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14780 Valid = true;
14781 }
14782
14783 if (!Valid)
14784 continue;
14785
14786 // Try to combine.
14787 if (SDValue Res =
14788 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14789 return Res;
14790 }
14791 }
14792
14793 return SDValue();
14794}
14795
14796// Fold
14797// (fp_to_int (froundeven X)) -> fcvt X, rne
14798// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14799// (fp_to_int (ffloor X)) -> fcvt X, rdn
14800// (fp_to_int (fceil X)) -> fcvt X, rup
14801// (fp_to_int (fround X)) -> fcvt X, rmm
14802// (fp_to_int (frint X)) -> fcvt X
14805 const RISCVSubtarget &Subtarget) {
14806 SelectionDAG &DAG = DCI.DAG;
14807 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14808 MVT XLenVT = Subtarget.getXLenVT();
14809
14810 SDValue Src = N->getOperand(0);
14811
14812 // Don't do this for strict-fp Src.
14813 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14814 return SDValue();
14815
14816 // Ensure the FP type is legal.
14817 if (!TLI.isTypeLegal(Src.getValueType()))
14818 return SDValue();
14819
14820 // Don't do this for f16 with Zfhmin and not Zfh.
14821 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14822 return SDValue();
14823
14824 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14825 // If the result is invalid, we didn't find a foldable instruction.
14826 if (FRM == RISCVFPRndMode::Invalid)
14827 return SDValue();
14828
14829 SDLoc DL(N);
14830 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14831 EVT VT = N->getValueType(0);
14832
14833 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14834 MVT SrcVT = Src.getSimpleValueType();
14835 MVT SrcContainerVT = SrcVT;
14836 MVT ContainerVT = VT.getSimpleVT();
14837 SDValue XVal = Src.getOperand(0);
14838
14839 // For widening and narrowing conversions we just combine it into a
14840 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14841 // end up getting lowered to their appropriate pseudo instructions based on
14842 // their operand types
14843 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
14844 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
14845 return SDValue();
14846
14847 // Make fixed-length vectors scalable first
14848 if (SrcVT.isFixedLengthVector()) {
14849 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
14850 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
14851 ContainerVT =
14852 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
14853 }
14854
14855 auto [Mask, VL] =
14856 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
14857
14858 SDValue FpToInt;
14859 if (FRM == RISCVFPRndMode::RTZ) {
14860 // Use the dedicated trunc static rounding mode if we're truncating so we
14861 // don't need to generate calls to fsrmi/fsrm
14862 unsigned Opc =
14864 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14865 } else if (FRM == RISCVFPRndMode::DYN) {
14866 unsigned Opc =
14868 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14869 } else {
14870 unsigned Opc =
14872 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
14873 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
14874 }
14875
14876 // If converted from fixed-length to scalable, convert back
14877 if (VT.isFixedLengthVector())
14878 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
14879
14880 return FpToInt;
14881 }
14882
14883 // Only handle XLen or i32 types. Other types narrower than XLen will
14884 // eventually be legalized to XLenVT.
14885 if (VT != MVT::i32 && VT != XLenVT)
14886 return SDValue();
14887
14888 unsigned Opc;
14889 if (VT == XLenVT)
14890 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14891 else
14893
14894 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
14895 DAG.getTargetConstant(FRM, DL, XLenVT));
14896 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
14897}
14898
14899// Fold
14900// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
14901// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
14902// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
14903// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
14904// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
14905// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
14908 const RISCVSubtarget &Subtarget) {
14909 SelectionDAG &DAG = DCI.DAG;
14910 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14911 MVT XLenVT = Subtarget.getXLenVT();
14912
14913 // Only handle XLen types. Other types narrower than XLen will eventually be
14914 // legalized to XLenVT.
14915 EVT DstVT = N->getValueType(0);
14916 if (DstVT != XLenVT)
14917 return SDValue();
14918
14919 SDValue Src = N->getOperand(0);
14920
14921 // Don't do this for strict-fp Src.
14922 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14923 return SDValue();
14924
14925 // Ensure the FP type is also legal.
14926 if (!TLI.isTypeLegal(Src.getValueType()))
14927 return SDValue();
14928
14929 // Don't do this for f16 with Zfhmin and not Zfh.
14930 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14931 return SDValue();
14932
14933 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14934
14935 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14936 if (FRM == RISCVFPRndMode::Invalid)
14937 return SDValue();
14938
14939 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
14940
14941 unsigned Opc;
14942 if (SatVT == DstVT)
14943 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14944 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
14946 else
14947 return SDValue();
14948 // FIXME: Support other SatVTs by clamping before or after the conversion.
14949
14950 Src = Src.getOperand(0);
14951
14952 SDLoc DL(N);
14953 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
14954 DAG.getTargetConstant(FRM, DL, XLenVT));
14955
14956 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
14957 // extend.
14958 if (Opc == RISCVISD::FCVT_WU_RV64)
14959 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
14960
14961 // RISC-V FP-to-int conversions saturate to the destination register size, but
14962 // don't produce 0 for nan.
14963 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
14964 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
14965}
14966
14967// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
14968// smaller than XLenVT.
14970 const RISCVSubtarget &Subtarget) {
14971 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
14972
14973 SDValue Src = N->getOperand(0);
14974 if (Src.getOpcode() != ISD::BSWAP)
14975 return SDValue();
14976
14977 EVT VT = N->getValueType(0);
14978 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
14979 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
14980 return SDValue();
14981
14982 SDLoc DL(N);
14983 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
14984}
14985
14986// Convert from one FMA opcode to another based on whether we are negating the
14987// multiply result and/or the accumulator.
14988// NOTE: Only supports RVV operations with VL.
14989static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
14990 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
14991 if (NegMul) {
14992 // clang-format off
14993 switch (Opcode) {
14994 default: llvm_unreachable("Unexpected opcode");
14995 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14996 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14997 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14998 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15003 }
15004 // clang-format on
15005 }
15006
15007 // Negating the accumulator changes ADD<->SUB.
15008 if (NegAcc) {
15009 // clang-format off
15010 switch (Opcode) {
15011 default: llvm_unreachable("Unexpected opcode");
15012 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15013 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15014 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15015 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15020 }
15021 // clang-format on
15022 }
15023
15024 return Opcode;
15025}
15026
15028 // Fold FNEG_VL into FMA opcodes.
15029 // The first operand of strict-fp is chain.
15030 unsigned Offset = N->isTargetStrictFPOpcode();
15031 SDValue A = N->getOperand(0 + Offset);
15032 SDValue B = N->getOperand(1 + Offset);
15033 SDValue C = N->getOperand(2 + Offset);
15034 SDValue Mask = N->getOperand(3 + Offset);
15035 SDValue VL = N->getOperand(4 + Offset);
15036
15037 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
15038 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
15039 V.getOperand(2) == VL) {
15040 // Return the negated input.
15041 V = V.getOperand(0);
15042 return true;
15043 }
15044
15045 return false;
15046 };
15047
15048 bool NegA = invertIfNegative(A);
15049 bool NegB = invertIfNegative(B);
15050 bool NegC = invertIfNegative(C);
15051
15052 // If no operands are negated, we're done.
15053 if (!NegA && !NegB && !NegC)
15054 return SDValue();
15055
15056 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
15057 if (N->isTargetStrictFPOpcode())
15058 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
15059 {N->getOperand(0), A, B, C, Mask, VL});
15060 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
15061 VL);
15062}
15063
15065 const RISCVSubtarget &Subtarget) {
15067 return V;
15068
15069 if (N->getValueType(0).isScalableVector() &&
15070 N->getValueType(0).getVectorElementType() == MVT::f32 &&
15071 (Subtarget.hasVInstructionsF16Minimal() &&
15072 !Subtarget.hasVInstructionsF16())) {
15073 return SDValue();
15074 }
15075
15076 // FIXME: Ignore strict opcodes for now.
15077 if (N->isTargetStrictFPOpcode())
15078 return SDValue();
15079
15080 // Try to form widening FMA.
15081 SDValue Op0 = N->getOperand(0);
15082 SDValue Op1 = N->getOperand(1);
15083 SDValue Mask = N->getOperand(3);
15084 SDValue VL = N->getOperand(4);
15085
15086 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
15088 return SDValue();
15089
15090 // TODO: Refactor to handle more complex cases similar to
15091 // combineBinOp_VLToVWBinOp_VL.
15092 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
15093 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
15094 return SDValue();
15095
15096 // Check the mask and VL are the same.
15097 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
15098 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
15099 return SDValue();
15100
15101 unsigned NewOpc;
15102 switch (N->getOpcode()) {
15103 default:
15104 llvm_unreachable("Unexpected opcode");
15106 NewOpc = RISCVISD::VFWMADD_VL;
15107 break;
15109 NewOpc = RISCVISD::VFWNMSUB_VL;
15110 break;
15112 NewOpc = RISCVISD::VFWNMADD_VL;
15113 break;
15115 NewOpc = RISCVISD::VFWMSUB_VL;
15116 break;
15117 }
15118
15119 Op0 = Op0.getOperand(0);
15120 Op1 = Op1.getOperand(0);
15121
15122 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
15123 N->getOperand(2), Mask, VL);
15124}
15125
15127 const RISCVSubtarget &Subtarget) {
15128 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15129
15130 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15131 return SDValue();
15132
15133 if (!isa<ConstantSDNode>(N->getOperand(1)))
15134 return SDValue();
15135 uint64_t ShAmt = N->getConstantOperandVal(1);
15136 if (ShAmt > 32)
15137 return SDValue();
15138
15139 SDValue N0 = N->getOperand(0);
15140
15141 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15142 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15143 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15144 if (ShAmt < 32 &&
15145 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15146 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15147 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15148 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15149 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15150 if (LShAmt < 32) {
15151 SDLoc ShlDL(N0.getOperand(0));
15152 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15153 N0.getOperand(0).getOperand(0),
15154 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15155 SDLoc DL(N);
15156 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15157 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15158 }
15159 }
15160
15161 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15162 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15163 //
15164 // Also try these folds where an add or sub is in the middle.
15165 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15166 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15167 SDValue Shl;
15168 ConstantSDNode *AddC = nullptr;
15169
15170 // We might have an ADD or SUB between the SRA and SHL.
15171 bool IsAdd = N0.getOpcode() == ISD::ADD;
15172 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15173 // Other operand needs to be a constant we can modify.
15174 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15175 if (!AddC)
15176 return SDValue();
15177
15178 // AddC needs to have at least 32 trailing zeros.
15179 if (AddC->getAPIntValue().countr_zero() < 32)
15180 return SDValue();
15181
15182 // All users should be a shift by constant less than or equal to 32. This
15183 // ensures we'll do this optimization for each of them to produce an
15184 // add/sub+sext_inreg they can all share.
15185 for (SDNode *U : N0->uses()) {
15186 if (U->getOpcode() != ISD::SRA ||
15187 !isa<ConstantSDNode>(U->getOperand(1)) ||
15188 U->getConstantOperandVal(1) > 32)
15189 return SDValue();
15190 }
15191
15192 Shl = N0.getOperand(IsAdd ? 0 : 1);
15193 } else {
15194 // Not an ADD or SUB.
15195 Shl = N0;
15196 }
15197
15198 // Look for a shift left by 32.
15199 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15200 Shl.getConstantOperandVal(1) != 32)
15201 return SDValue();
15202
15203 // We if we didn't look through an add/sub, then the shl should have one use.
15204 // If we did look through an add/sub, the sext_inreg we create is free so
15205 // we're only creating 2 new instructions. It's enough to only remove the
15206 // original sra+add/sub.
15207 if (!AddC && !Shl.hasOneUse())
15208 return SDValue();
15209
15210 SDLoc DL(N);
15211 SDValue In = Shl.getOperand(0);
15212
15213 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15214 // constant.
15215 if (AddC) {
15216 SDValue ShiftedAddC =
15217 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15218 if (IsAdd)
15219 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15220 else
15221 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15222 }
15223
15224 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15225 DAG.getValueType(MVT::i32));
15226 if (ShAmt == 32)
15227 return SExt;
15228
15229 return DAG.getNode(
15230 ISD::SHL, DL, MVT::i64, SExt,
15231 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15232}
15233
15234// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15235// the result is used as the conditon of a br_cc or select_cc we can invert,
15236// inverting the setcc is free, and Z is 0/1. Caller will invert the
15237// br_cc/select_cc.
15239 bool IsAnd = Cond.getOpcode() == ISD::AND;
15240 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15241 return SDValue();
15242
15243 if (!Cond.hasOneUse())
15244 return SDValue();
15245
15246 SDValue Setcc = Cond.getOperand(0);
15247 SDValue Xor = Cond.getOperand(1);
15248 // Canonicalize setcc to LHS.
15249 if (Setcc.getOpcode() != ISD::SETCC)
15250 std::swap(Setcc, Xor);
15251 // LHS should be a setcc and RHS should be an xor.
15252 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15253 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15254 return SDValue();
15255
15256 // If the condition is an And, SimplifyDemandedBits may have changed
15257 // (xor Z, 1) to (not Z).
15258 SDValue Xor1 = Xor.getOperand(1);
15259 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15260 return SDValue();
15261
15262 EVT VT = Cond.getValueType();
15263 SDValue Xor0 = Xor.getOperand(0);
15264
15265 // The LHS of the xor needs to be 0/1.
15267 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15268 return SDValue();
15269
15270 // We can only invert integer setccs.
15271 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15272 if (!SetCCOpVT.isScalarInteger())
15273 return SDValue();
15274
15275 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15276 if (ISD::isIntEqualitySetCC(CCVal)) {
15277 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15278 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15279 Setcc.getOperand(1), CCVal);
15280 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15281 // Invert (setlt 0, X) by converting to (setlt X, 1).
15282 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15283 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15284 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15285 // (setlt X, 1) by converting to (setlt 0, X).
15286 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15287 DAG.getConstant(0, SDLoc(Setcc), VT),
15288 Setcc.getOperand(0), CCVal);
15289 } else
15290 return SDValue();
15291
15292 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15293 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15294}
15295
15296// Perform common combines for BR_CC and SELECT_CC condtions.
15297static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15298 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15299 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15300
15301 // As far as arithmetic right shift always saves the sign,
15302 // shift can be omitted.
15303 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15304 // setge (sra X, N), 0 -> setge X, 0
15305 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15306 LHS.getOpcode() == ISD::SRA) {
15307 LHS = LHS.getOperand(0);
15308 return true;
15309 }
15310
15311 if (!ISD::isIntEqualitySetCC(CCVal))
15312 return false;
15313
15314 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15315 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15316 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15317 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15318 // If we're looking for eq 0 instead of ne 0, we need to invert the
15319 // condition.
15320 bool Invert = CCVal == ISD::SETEQ;
15321 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15322 if (Invert)
15323 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15324
15325 RHS = LHS.getOperand(1);
15326 LHS = LHS.getOperand(0);
15327 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15328
15329 CC = DAG.getCondCode(CCVal);
15330 return true;
15331 }
15332
15333 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15334 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15335 RHS = LHS.getOperand(1);
15336 LHS = LHS.getOperand(0);
15337 return true;
15338 }
15339
15340 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15341 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15342 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15343 SDValue LHS0 = LHS.getOperand(0);
15344 if (LHS0.getOpcode() == ISD::AND &&
15345 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15346 uint64_t Mask = LHS0.getConstantOperandVal(1);
15347 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15348 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15349 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15350 CC = DAG.getCondCode(CCVal);
15351
15352 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15353 LHS = LHS0.getOperand(0);
15354 if (ShAmt != 0)
15355 LHS =
15356 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15357 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15358 return true;
15359 }
15360 }
15361 }
15362
15363 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15364 // This can occur when legalizing some floating point comparisons.
15365 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15366 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15367 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15368 CC = DAG.getCondCode(CCVal);
15369 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15370 return true;
15371 }
15372
15373 if (isNullConstant(RHS)) {
15374 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15375 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15376 CC = DAG.getCondCode(CCVal);
15377 LHS = NewCond;
15378 return true;
15379 }
15380 }
15381
15382 return false;
15383}
15384
15385// Fold
15386// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15387// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15388// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15389// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15391 SDValue TrueVal, SDValue FalseVal,
15392 bool Swapped) {
15393 bool Commutative = true;
15394 unsigned Opc = TrueVal.getOpcode();
15395 switch (Opc) {
15396 default:
15397 return SDValue();
15398 case ISD::SHL:
15399 case ISD::SRA:
15400 case ISD::SRL:
15401 case ISD::SUB:
15402 Commutative = false;
15403 break;
15404 case ISD::ADD:
15405 case ISD::OR:
15406 case ISD::XOR:
15407 break;
15408 }
15409
15410 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15411 return SDValue();
15412
15413 unsigned OpToFold;
15414 if (FalseVal == TrueVal.getOperand(0))
15415 OpToFold = 0;
15416 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15417 OpToFold = 1;
15418 else
15419 return SDValue();
15420
15421 EVT VT = N->getValueType(0);
15422 SDLoc DL(N);
15423 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15424 EVT OtherOpVT = OtherOp.getValueType();
15425 SDValue IdentityOperand =
15426 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15427 if (!Commutative)
15428 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15429 assert(IdentityOperand && "No identity operand!");
15430
15431 if (Swapped)
15432 std::swap(OtherOp, IdentityOperand);
15433 SDValue NewSel =
15434 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15435 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15436}
15437
15438// This tries to get rid of `select` and `icmp` that are being used to handle
15439// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15441 SDValue Cond = N->getOperand(0);
15442
15443 // This represents either CTTZ or CTLZ instruction.
15444 SDValue CountZeroes;
15445
15446 SDValue ValOnZero;
15447
15448 if (Cond.getOpcode() != ISD::SETCC)
15449 return SDValue();
15450
15451 if (!isNullConstant(Cond->getOperand(1)))
15452 return SDValue();
15453
15454 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15455 if (CCVal == ISD::CondCode::SETEQ) {
15456 CountZeroes = N->getOperand(2);
15457 ValOnZero = N->getOperand(1);
15458 } else if (CCVal == ISD::CondCode::SETNE) {
15459 CountZeroes = N->getOperand(1);
15460 ValOnZero = N->getOperand(2);
15461 } else {
15462 return SDValue();
15463 }
15464
15465 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15466 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15467 CountZeroes = CountZeroes.getOperand(0);
15468
15469 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15470 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15471 CountZeroes.getOpcode() != ISD::CTLZ &&
15472 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15473 return SDValue();
15474
15475 if (!isNullConstant(ValOnZero))
15476 return SDValue();
15477
15478 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15479 if (Cond->getOperand(0) != CountZeroesArgument)
15480 return SDValue();
15481
15482 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15483 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15484 CountZeroes.getValueType(), CountZeroesArgument);
15485 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15486 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15487 CountZeroes.getValueType(), CountZeroesArgument);
15488 }
15489
15490 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15491 SDValue BitWidthMinusOne =
15492 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15493
15494 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15495 CountZeroes, BitWidthMinusOne);
15496 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15497}
15498
15500 const RISCVSubtarget &Subtarget) {
15501 SDValue Cond = N->getOperand(0);
15502 SDValue True = N->getOperand(1);
15503 SDValue False = N->getOperand(2);
15504 SDLoc DL(N);
15505 EVT VT = N->getValueType(0);
15506 EVT CondVT = Cond.getValueType();
15507
15508 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15509 return SDValue();
15510
15511 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15512 // BEXTI, where C is power of 2.
15513 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15514 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15515 SDValue LHS = Cond.getOperand(0);
15516 SDValue RHS = Cond.getOperand(1);
15517 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15518 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15519 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15520 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15521 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15522 return DAG.getSelect(DL, VT,
15523 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15524 False, True);
15525 }
15526 }
15527 return SDValue();
15528}
15529
15531 const RISCVSubtarget &Subtarget) {
15532 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15533 return Folded;
15534
15535 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15536 return V;
15537
15538 if (Subtarget.hasConditionalMoveFusion())
15539 return SDValue();
15540
15541 SDValue TrueVal = N->getOperand(1);
15542 SDValue FalseVal = N->getOperand(2);
15543 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15544 return V;
15545 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15546}
15547
15548/// If we have a build_vector where each lane is binop X, C, where C
15549/// is a constant (but not necessarily the same constant on all lanes),
15550/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15551/// We assume that materializing a constant build vector will be no more
15552/// expensive that performing O(n) binops.
15554 const RISCVSubtarget &Subtarget,
15555 const RISCVTargetLowering &TLI) {
15556 SDLoc DL(N);
15557 EVT VT = N->getValueType(0);
15558
15559 assert(!VT.isScalableVector() && "unexpected build vector");
15560
15561 if (VT.getVectorNumElements() == 1)
15562 return SDValue();
15563
15564 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15565 if (!TLI.isBinOp(Opcode))
15566 return SDValue();
15567
15568 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15569 return SDValue();
15570
15571 // This BUILD_VECTOR involves an implicit truncation, and sinking
15572 // truncates through binops is non-trivial.
15573 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15574 return SDValue();
15575
15576 SmallVector<SDValue> LHSOps;
15577 SmallVector<SDValue> RHSOps;
15578 for (SDValue Op : N->ops()) {
15579 if (Op.isUndef()) {
15580 // We can't form a divide or remainder from undef.
15581 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15582 return SDValue();
15583
15584 LHSOps.push_back(Op);
15585 RHSOps.push_back(Op);
15586 continue;
15587 }
15588
15589 // TODO: We can handle operations which have an neutral rhs value
15590 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15591 // of profit in a more explicit manner.
15592 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15593 return SDValue();
15594
15595 LHSOps.push_back(Op.getOperand(0));
15596 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15597 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15598 return SDValue();
15599 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15600 // have different LHS and RHS types.
15601 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15602 return SDValue();
15603
15604 RHSOps.push_back(Op.getOperand(1));
15605 }
15606
15607 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15608 DAG.getBuildVector(VT, DL, RHSOps));
15609}
15610
15612 const RISCVSubtarget &Subtarget,
15613 const RISCVTargetLowering &TLI) {
15614 SDValue InVec = N->getOperand(0);
15615 SDValue InVal = N->getOperand(1);
15616 SDValue EltNo = N->getOperand(2);
15617 SDLoc DL(N);
15618
15619 EVT VT = InVec.getValueType();
15620 if (VT.isScalableVector())
15621 return SDValue();
15622
15623 if (!InVec.hasOneUse())
15624 return SDValue();
15625
15626 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15627 // move the insert_vector_elts into the arms of the binop. Note that
15628 // the new RHS must be a constant.
15629 const unsigned InVecOpcode = InVec->getOpcode();
15630 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15631 InVal.hasOneUse()) {
15632 SDValue InVecLHS = InVec->getOperand(0);
15633 SDValue InVecRHS = InVec->getOperand(1);
15634 SDValue InValLHS = InVal->getOperand(0);
15635 SDValue InValRHS = InVal->getOperand(1);
15636
15638 return SDValue();
15639 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15640 return SDValue();
15641 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15642 // have different LHS and RHS types.
15643 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15644 return SDValue();
15646 InVecLHS, InValLHS, EltNo);
15648 InVecRHS, InValRHS, EltNo);
15649 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15650 }
15651
15652 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15653 // move the insert_vector_elt to the source operand of the concat_vector.
15654 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15655 return SDValue();
15656
15657 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15658 if (!IndexC)
15659 return SDValue();
15660 unsigned Elt = IndexC->getZExtValue();
15661
15662 EVT ConcatVT = InVec.getOperand(0).getValueType();
15663 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15664 return SDValue();
15665 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15666 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15667
15668 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15669 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15670 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15671 ConcatOp, InVal, NewIdx);
15672
15673 SmallVector<SDValue> ConcatOps;
15674 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15675 ConcatOps[ConcatOpIdx] = ConcatOp;
15676 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15677}
15678
15679// If we're concatenating a series of vector loads like
15680// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15681// Then we can turn this into a strided load by widening the vector elements
15682// vlse32 p, stride=n
15684 const RISCVSubtarget &Subtarget,
15685 const RISCVTargetLowering &TLI) {
15686 SDLoc DL(N);
15687 EVT VT = N->getValueType(0);
15688
15689 // Only perform this combine on legal MVTs.
15690 if (!TLI.isTypeLegal(VT))
15691 return SDValue();
15692
15693 // TODO: Potentially extend this to scalable vectors
15694 if (VT.isScalableVector())
15695 return SDValue();
15696
15697 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15698 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15699 !SDValue(BaseLd, 0).hasOneUse())
15700 return SDValue();
15701
15702 EVT BaseLdVT = BaseLd->getValueType(0);
15703
15704 // Go through the loads and check that they're strided
15706 Lds.push_back(BaseLd);
15707 Align Align = BaseLd->getAlign();
15708 for (SDValue Op : N->ops().drop_front()) {
15709 auto *Ld = dyn_cast<LoadSDNode>(Op);
15710 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15711 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15712 Ld->getValueType(0) != BaseLdVT)
15713 return SDValue();
15714
15715 Lds.push_back(Ld);
15716
15717 // The common alignment is the most restrictive (smallest) of all the loads
15718 Align = std::min(Align, Ld->getAlign());
15719 }
15720
15721 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15722 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15723 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15724 // If the load ptrs can be decomposed into a common (Base + Index) with a
15725 // common constant stride, then return the constant stride.
15726 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15727 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15728 if (BIO1.equalBaseIndex(BIO2, DAG))
15729 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15730
15731 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15732 SDValue P1 = Ld1->getBasePtr();
15733 SDValue P2 = Ld2->getBasePtr();
15734 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15735 return {{P2.getOperand(1), false}};
15736 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15737 return {{P1.getOperand(1), true}};
15738
15739 return std::nullopt;
15740 };
15741
15742 // Get the distance between the first and second loads
15743 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15744 if (!BaseDiff)
15745 return SDValue();
15746
15747 // Check all the loads are the same distance apart
15748 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15749 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15750 return SDValue();
15751
15752 // TODO: At this point, we've successfully matched a generalized gather
15753 // load. Maybe we should emit that, and then move the specialized
15754 // matchers above and below into a DAG combine?
15755
15756 // Get the widened scalar type, e.g. v4i8 -> i64
15757 unsigned WideScalarBitWidth =
15758 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15759 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15760
15761 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15762 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15763 if (!TLI.isTypeLegal(WideVecVT))
15764 return SDValue();
15765
15766 // Check that the operation is legal
15767 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15768 return SDValue();
15769
15770 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15771 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
15772 ? std::get<SDValue>(StrideVariant)
15773 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
15774 Lds[0]->getOffset().getValueType());
15775 if (MustNegateStride)
15776 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15777
15778 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
15779 SDValue IntID =
15780 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15781 Subtarget.getXLenVT());
15782
15783 SDValue AllOneMask =
15784 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15785 DAG.getConstant(1, DL, MVT::i1));
15786
15787 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
15788 BaseLd->getBasePtr(), Stride, AllOneMask};
15789
15790 uint64_t MemSize;
15791 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15792 ConstStride && ConstStride->getSExtValue() >= 0)
15793 // total size = (elsize * n) + (stride - elsize) * (n-1)
15794 // = elsize + stride * (n-1)
15795 MemSize = WideScalarVT.getSizeInBits() +
15796 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15797 else
15798 // If Stride isn't constant, then we can't know how much it will load
15800
15802 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15803 Align);
15804
15805 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
15806 Ops, WideVecVT, MMO);
15807 for (SDValue Ld : N->ops())
15808 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15809
15810 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15811}
15812
15814 const RISCVSubtarget &Subtarget) {
15815
15816 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15817
15818 if (N->getValueType(0).isFixedLengthVector())
15819 return SDValue();
15820
15821 SDValue Addend = N->getOperand(0);
15822 SDValue MulOp = N->getOperand(1);
15823
15824 if (N->getOpcode() == RISCVISD::ADD_VL) {
15825 SDValue AddMergeOp = N->getOperand(2);
15826 if (!AddMergeOp.isUndef())
15827 return SDValue();
15828 }
15829
15830 auto IsVWMulOpc = [](unsigned Opc) {
15831 switch (Opc) {
15832 case RISCVISD::VWMUL_VL:
15835 return true;
15836 default:
15837 return false;
15838 }
15839 };
15840
15841 if (!IsVWMulOpc(MulOp.getOpcode()))
15842 std::swap(Addend, MulOp);
15843
15844 if (!IsVWMulOpc(MulOp.getOpcode()))
15845 return SDValue();
15846
15847 SDValue MulMergeOp = MulOp.getOperand(2);
15848
15849 if (!MulMergeOp.isUndef())
15850 return SDValue();
15851
15852 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15853 const RISCVSubtarget &Subtarget) {
15854 if (N->getOpcode() == ISD::ADD) {
15855 SDLoc DL(N);
15856 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15857 Subtarget);
15858 }
15859 return std::make_pair(N->getOperand(3), N->getOperand(4));
15860 }(N, DAG, Subtarget);
15861
15862 SDValue MulMask = MulOp.getOperand(3);
15863 SDValue MulVL = MulOp.getOperand(4);
15864
15865 if (AddMask != MulMask || AddVL != MulVL)
15866 return SDValue();
15867
15868 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15869 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15870 "Unexpected opcode after VWMACC_VL");
15871 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15872 "Unexpected opcode after VWMACC_VL!");
15873 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15874 "Unexpected opcode after VWMUL_VL!");
15875 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15876 "Unexpected opcode after VWMUL_VL!");
15877
15878 SDLoc DL(N);
15879 EVT VT = N->getValueType(0);
15880 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15881 AddVL};
15882 return DAG.getNode(Opc, DL, VT, Ops);
15883}
15884
15886 ISD::MemIndexType &IndexType,
15888 if (!DCI.isBeforeLegalize())
15889 return false;
15890
15891 SelectionDAG &DAG = DCI.DAG;
15892 const MVT XLenVT =
15893 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15894
15895 const EVT IndexVT = Index.getValueType();
15896
15897 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15898 // mode, so anything else must be manually legalized.
15899 if (!isIndexTypeSigned(IndexType))
15900 return false;
15901
15902 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
15903 // Any index legalization should first promote to XLenVT, so we don't lose
15904 // bits when scaling. This may create an illegal index type so we let
15905 // LLVM's legalization take care of the splitting.
15906 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15908 IndexVT.changeVectorElementType(XLenVT), Index);
15909 }
15910 IndexType = ISD::UNSIGNED_SCALED;
15911 return true;
15912}
15913
15914/// Match the index vector of a scatter or gather node as the shuffle mask
15915/// which performs the rearrangement if possible. Will only match if
15916/// all lanes are touched, and thus replacing the scatter or gather with
15917/// a unit strided access and shuffle is legal.
15919 SmallVector<int> &ShuffleMask) {
15920 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15921 return false;
15923 return false;
15924
15925 const unsigned ElementSize = VT.getScalarStoreSize();
15926 const unsigned NumElems = VT.getVectorNumElements();
15927
15928 // Create the shuffle mask and check all bits active
15929 assert(ShuffleMask.empty());
15930 BitVector ActiveLanes(NumElems);
15931 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15932 // TODO: We've found an active bit of UB, and could be
15933 // more aggressive here if desired.
15934 if (Index->getOperand(i)->isUndef())
15935 return false;
15936 uint64_t C = Index->getConstantOperandVal(i);
15937 if (C % ElementSize != 0)
15938 return false;
15939 C = C / ElementSize;
15940 if (C >= NumElems)
15941 return false;
15942 ShuffleMask.push_back(C);
15943 ActiveLanes.set(C);
15944 }
15945 return ActiveLanes.all();
15946}
15947
15948/// Match the index of a gather or scatter operation as an operation
15949/// with twice the element width and half the number of elements. This is
15950/// generally profitable (if legal) because these operations are linear
15951/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
15952/// come out ahead.
15954 Align BaseAlign, const RISCVSubtarget &ST) {
15955 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15956 return false;
15958 return false;
15959
15960 // Attempt a doubling. If we can use a element type 4x or 8x in
15961 // size, this will happen via multiply iterations of the transform.
15962 const unsigned NumElems = VT.getVectorNumElements();
15963 if (NumElems % 2 != 0)
15964 return false;
15965
15966 const unsigned ElementSize = VT.getScalarStoreSize();
15967 const unsigned WiderElementSize = ElementSize * 2;
15968 if (WiderElementSize > ST.getELen()/8)
15969 return false;
15970
15971 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
15972 return false;
15973
15974 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15975 // TODO: We've found an active bit of UB, and could be
15976 // more aggressive here if desired.
15977 if (Index->getOperand(i)->isUndef())
15978 return false;
15979 // TODO: This offset check is too strict if we support fully
15980 // misaligned memory operations.
15981 uint64_t C = Index->getConstantOperandVal(i);
15982 if (i % 2 == 0) {
15983 if (C % WiderElementSize != 0)
15984 return false;
15985 continue;
15986 }
15987 uint64_t Last = Index->getConstantOperandVal(i-1);
15988 if (C != Last + ElementSize)
15989 return false;
15990 }
15991 return true;
15992}
15993
15994
15996 DAGCombinerInfo &DCI) const {
15997 SelectionDAG &DAG = DCI.DAG;
15998 const MVT XLenVT = Subtarget.getXLenVT();
15999 SDLoc DL(N);
16000
16001 // Helper to call SimplifyDemandedBits on an operand of N where only some low
16002 // bits are demanded. N will be added to the Worklist if it was not deleted.
16003 // Caller should return SDValue(N, 0) if this returns true.
16004 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
16005 SDValue Op = N->getOperand(OpNo);
16006 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
16007 if (!SimplifyDemandedBits(Op, Mask, DCI))
16008 return false;
16009
16010 if (N->getOpcode() != ISD::DELETED_NODE)
16011 DCI.AddToWorklist(N);
16012 return true;
16013 };
16014
16015 switch (N->getOpcode()) {
16016 default:
16017 break;
16018 case RISCVISD::SplitF64: {
16019 SDValue Op0 = N->getOperand(0);
16020 // If the input to SplitF64 is just BuildPairF64 then the operation is
16021 // redundant. Instead, use BuildPairF64's operands directly.
16022 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
16023 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
16024
16025 if (Op0->isUndef()) {
16026 SDValue Lo = DAG.getUNDEF(MVT::i32);
16027 SDValue Hi = DAG.getUNDEF(MVT::i32);
16028 return DCI.CombineTo(N, Lo, Hi);
16029 }
16030
16031 // It's cheaper to materialise two 32-bit integers than to load a double
16032 // from the constant pool and transfer it to integer registers through the
16033 // stack.
16034 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
16035 APInt V = C->getValueAPF().bitcastToAPInt();
16036 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
16037 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
16038 return DCI.CombineTo(N, Lo, Hi);
16039 }
16040
16041 // This is a target-specific version of a DAGCombine performed in
16042 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16043 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16044 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16045 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16046 !Op0.getNode()->hasOneUse())
16047 break;
16048 SDValue NewSplitF64 =
16049 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
16050 Op0.getOperand(0));
16051 SDValue Lo = NewSplitF64.getValue(0);
16052 SDValue Hi = NewSplitF64.getValue(1);
16053 APInt SignBit = APInt::getSignMask(32);
16054 if (Op0.getOpcode() == ISD::FNEG) {
16055 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
16056 DAG.getConstant(SignBit, DL, MVT::i32));
16057 return DCI.CombineTo(N, Lo, NewHi);
16058 }
16059 assert(Op0.getOpcode() == ISD::FABS);
16060 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
16061 DAG.getConstant(~SignBit, DL, MVT::i32));
16062 return DCI.CombineTo(N, Lo, NewHi);
16063 }
16064 case RISCVISD::SLLW:
16065 case RISCVISD::SRAW:
16066 case RISCVISD::SRLW:
16067 case RISCVISD::RORW:
16068 case RISCVISD::ROLW: {
16069 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
16070 if (SimplifyDemandedLowBitsHelper(0, 32) ||
16071 SimplifyDemandedLowBitsHelper(1, 5))
16072 return SDValue(N, 0);
16073
16074 break;
16075 }
16076 case RISCVISD::CLZW:
16077 case RISCVISD::CTZW: {
16078 // Only the lower 32 bits of the first operand are read
16079 if (SimplifyDemandedLowBitsHelper(0, 32))
16080 return SDValue(N, 0);
16081 break;
16082 }
16084 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
16085 // conversion is unnecessary and can be replaced with the
16086 // FMV_X_ANYEXTW_RV64 operand.
16087 SDValue Op0 = N->getOperand(0);
16089 return Op0.getOperand(0);
16090 break;
16091 }
16094 SDLoc DL(N);
16095 SDValue Op0 = N->getOperand(0);
16096 MVT VT = N->getSimpleValueType(0);
16097 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
16098 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16099 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16100 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16101 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16102 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16103 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16104 assert(Op0.getOperand(0).getValueType() == VT &&
16105 "Unexpected value type!");
16106 return Op0.getOperand(0);
16107 }
16108
16109 // This is a target-specific version of a DAGCombine performed in
16110 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16111 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16112 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16113 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16114 !Op0.getNode()->hasOneUse())
16115 break;
16116 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16117 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16118 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16119 if (Op0.getOpcode() == ISD::FNEG)
16120 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16121 DAG.getConstant(SignBit, DL, VT));
16122
16123 assert(Op0.getOpcode() == ISD::FABS);
16124 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16125 DAG.getConstant(~SignBit, DL, VT));
16126 }
16127 case ISD::ABS: {
16128 EVT VT = N->getValueType(0);
16129 SDValue N0 = N->getOperand(0);
16130 // abs (sext) -> zext (abs)
16131 // abs (zext) -> zext (handled elsewhere)
16132 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16133 SDValue Src = N0.getOperand(0);
16134 SDLoc DL(N);
16135 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16136 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16137 }
16138 break;
16139 }
16140 case ISD::ADD: {
16141 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16142 return V;
16143 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16144 return V;
16145 return performADDCombine(N, DAG, Subtarget);
16146 }
16147 case ISD::SUB: {
16148 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16149 return V;
16150 return performSUBCombine(N, DAG, Subtarget);
16151 }
16152 case ISD::AND:
16153 return performANDCombine(N, DCI, Subtarget);
16154 case ISD::OR: {
16155 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16156 return V;
16157 return performORCombine(N, DCI, Subtarget);
16158 }
16159 case ISD::XOR:
16160 return performXORCombine(N, DAG, Subtarget);
16161 case ISD::MUL:
16162 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16163 return V;
16164 return performMULCombine(N, DAG, DCI, Subtarget);
16165 case ISD::SDIV:
16166 case ISD::UDIV:
16167 case ISD::SREM:
16168 case ISD::UREM:
16169 if (SDValue V = combineBinOpOfZExt(N, DAG))
16170 return V;
16171 break;
16172 case ISD::FADD:
16173 case ISD::UMAX:
16174 case ISD::UMIN:
16175 case ISD::SMAX:
16176 case ISD::SMIN:
16177 case ISD::FMAXNUM:
16178 case ISD::FMINNUM: {
16179 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16180 return V;
16181 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16182 return V;
16183 return SDValue();
16184 }
16185 case ISD::SETCC:
16186 return performSETCCCombine(N, DAG, Subtarget);
16188 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16189 case ISD::ZERO_EXTEND:
16190 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16191 // type legalization. This is safe because fp_to_uint produces poison if
16192 // it overflows.
16193 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16194 SDValue Src = N->getOperand(0);
16195 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16196 isTypeLegal(Src.getOperand(0).getValueType()))
16197 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16198 Src.getOperand(0));
16199 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16200 isTypeLegal(Src.getOperand(1).getValueType())) {
16201 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16202 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16203 Src.getOperand(0), Src.getOperand(1));
16204 DCI.CombineTo(N, Res);
16205 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16206 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16207 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16208 }
16209 }
16210 return SDValue();
16212 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16213 // This would be benefit for the cases where X and Y are both the same value
16214 // type of low precision vectors. Since the truncate would be lowered into
16215 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16216 // restriction, such pattern would be expanded into a series of "vsetvli"
16217 // and "vnsrl" instructions later to reach this point.
16218 auto IsTruncNode = [](SDValue V) {
16219 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
16220 return false;
16221 SDValue VL = V.getOperand(2);
16222 auto *C = dyn_cast<ConstantSDNode>(VL);
16223 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
16224 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
16225 (isa<RegisterSDNode>(VL) &&
16226 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16227 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
16228 IsVLMAXForVMSET;
16229 };
16230
16231 SDValue Op = N->getOperand(0);
16232
16233 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16234 // to distinguish such pattern.
16235 while (IsTruncNode(Op)) {
16236 if (!Op.hasOneUse())
16237 return SDValue();
16238 Op = Op.getOperand(0);
16239 }
16240
16241 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
16242 SDValue N0 = Op.getOperand(0);
16243 SDValue N1 = Op.getOperand(1);
16244 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
16245 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
16246 SDValue N00 = N0.getOperand(0);
16247 SDValue N10 = N1.getOperand(0);
16248 if (N00.getValueType().isVector() &&
16249 N00.getValueType() == N10.getValueType() &&
16250 N->getValueType(0) == N10.getValueType()) {
16251 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16252 SDValue SMin = DAG.getNode(
16253 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16254 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16255 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16256 }
16257 }
16258 }
16259 break;
16260 }
16261 case ISD::TRUNCATE:
16262 return performTRUNCATECombine(N, DAG, Subtarget);
16263 case ISD::SELECT:
16264 return performSELECTCombine(N, DAG, Subtarget);
16266 case RISCVISD::CZERO_NEZ: {
16267 SDValue Val = N->getOperand(0);
16268 SDValue Cond = N->getOperand(1);
16269
16270 unsigned Opc = N->getOpcode();
16271
16272 // czero_eqz x, x -> x
16273 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
16274 return Val;
16275
16276 unsigned InvOpc =
16278
16279 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
16280 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
16281 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
16282 SDValue NewCond = Cond.getOperand(0);
16283 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
16284 if (DAG.MaskedValueIsZero(NewCond, Mask))
16285 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
16286 }
16287 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
16288 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
16289 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
16290 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
16291 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
16292 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16293 if (ISD::isIntEqualitySetCC(CCVal))
16294 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
16295 N->getValueType(0), Val, Cond.getOperand(0));
16296 }
16297 return SDValue();
16298 }
16299 case RISCVISD::SELECT_CC: {
16300 // Transform
16301 SDValue LHS = N->getOperand(0);
16302 SDValue RHS = N->getOperand(1);
16303 SDValue CC = N->getOperand(2);
16304 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16305 SDValue TrueV = N->getOperand(3);
16306 SDValue FalseV = N->getOperand(4);
16307 SDLoc DL(N);
16308 EVT VT = N->getValueType(0);
16309
16310 // If the True and False values are the same, we don't need a select_cc.
16311 if (TrueV == FalseV)
16312 return TrueV;
16313
16314 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16315 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16316 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16317 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16318 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16319 if (CCVal == ISD::CondCode::SETGE)
16320 std::swap(TrueV, FalseV);
16321
16322 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16323 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16324 // Only handle simm12, if it is not in this range, it can be considered as
16325 // register.
16326 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16327 isInt<12>(TrueSImm - FalseSImm)) {
16328 SDValue SRA =
16329 DAG.getNode(ISD::SRA, DL, VT, LHS,
16330 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16331 SDValue AND =
16332 DAG.getNode(ISD::AND, DL, VT, SRA,
16333 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16334 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16335 }
16336
16337 if (CCVal == ISD::CondCode::SETGE)
16338 std::swap(TrueV, FalseV);
16339 }
16340
16341 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16342 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16343 {LHS, RHS, CC, TrueV, FalseV});
16344
16345 if (!Subtarget.hasConditionalMoveFusion()) {
16346 // (select c, -1, y) -> -c | y
16347 if (isAllOnesConstant(TrueV)) {
16348 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16349 SDValue Neg = DAG.getNegative(C, DL, VT);
16350 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16351 }
16352 // (select c, y, -1) -> -!c | y
16353 if (isAllOnesConstant(FalseV)) {
16354 SDValue C =
16355 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16356 SDValue Neg = DAG.getNegative(C, DL, VT);
16357 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16358 }
16359
16360 // (select c, 0, y) -> -!c & y
16361 if (isNullConstant(TrueV)) {
16362 SDValue C =
16363 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16364 SDValue Neg = DAG.getNegative(C, DL, VT);
16365 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16366 }
16367 // (select c, y, 0) -> -c & y
16368 if (isNullConstant(FalseV)) {
16369 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16370 SDValue Neg = DAG.getNegative(C, DL, VT);
16371 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16372 }
16373 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16374 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16375 if (((isOneConstant(FalseV) && LHS == TrueV &&
16376 CCVal == ISD::CondCode::SETNE) ||
16377 (isOneConstant(TrueV) && LHS == FalseV &&
16378 CCVal == ISD::CondCode::SETEQ)) &&
16380 // freeze it to be safe.
16381 LHS = DAG.getFreeze(LHS);
16383 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16384 }
16385 }
16386
16387 // If both true/false are an xor with 1, pull through the select.
16388 // This can occur after op legalization if both operands are setccs that
16389 // require an xor to invert.
16390 // FIXME: Generalize to other binary ops with identical operand?
16391 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16392 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16393 isOneConstant(TrueV.getOperand(1)) &&
16394 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16395 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16396 TrueV.getOperand(0), FalseV.getOperand(0));
16397 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16398 }
16399
16400 return SDValue();
16401 }
16402 case RISCVISD::BR_CC: {
16403 SDValue LHS = N->getOperand(1);
16404 SDValue RHS = N->getOperand(2);
16405 SDValue CC = N->getOperand(3);
16406 SDLoc DL(N);
16407
16408 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16409 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16410 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16411
16412 return SDValue();
16413 }
16414 case ISD::BITREVERSE:
16415 return performBITREVERSECombine(N, DAG, Subtarget);
16416 case ISD::FP_TO_SINT:
16417 case ISD::FP_TO_UINT:
16418 return performFP_TO_INTCombine(N, DCI, Subtarget);
16421 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16422 case ISD::FCOPYSIGN: {
16423 EVT VT = N->getValueType(0);
16424 if (!VT.isVector())
16425 break;
16426 // There is a form of VFSGNJ which injects the negated sign of its second
16427 // operand. Try and bubble any FNEG up after the extend/round to produce
16428 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16429 // TRUNC=1.
16430 SDValue In2 = N->getOperand(1);
16431 // Avoid cases where the extend/round has multiple uses, as duplicating
16432 // those is typically more expensive than removing a fneg.
16433 if (!In2.hasOneUse())
16434 break;
16435 if (In2.getOpcode() != ISD::FP_EXTEND &&
16436 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16437 break;
16438 In2 = In2.getOperand(0);
16439 if (In2.getOpcode() != ISD::FNEG)
16440 break;
16441 SDLoc DL(N);
16442 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16443 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16444 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16445 }
16446 case ISD::MGATHER: {
16447 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
16448 const EVT VT = N->getValueType(0);
16449 SDValue Index = MGN->getIndex();
16450 SDValue ScaleOp = MGN->getScale();
16451 ISD::MemIndexType IndexType = MGN->getIndexType();
16452 assert(!MGN->isIndexScaled() &&
16453 "Scaled gather/scatter should not be formed");
16454
16455 SDLoc DL(N);
16456 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16457 return DAG.getMaskedGather(
16458 N->getVTList(), MGN->getMemoryVT(), DL,
16459 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16460 MGN->getBasePtr(), Index, ScaleOp},
16461 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16462
16463 if (narrowIndex(Index, IndexType, DAG))
16464 return DAG.getMaskedGather(
16465 N->getVTList(), MGN->getMemoryVT(), DL,
16466 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16467 MGN->getBasePtr(), Index, ScaleOp},
16468 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16469
16470 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16471 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16472 // The sequence will be XLenVT, not the type of Index. Tell
16473 // isSimpleVIDSequence this so we avoid overflow.
16474 if (std::optional<VIDSequence> SimpleVID =
16475 isSimpleVIDSequence(Index, Subtarget.getXLen());
16476 SimpleVID && SimpleVID->StepDenominator == 1) {
16477 const int64_t StepNumerator = SimpleVID->StepNumerator;
16478 const int64_t Addend = SimpleVID->Addend;
16479
16480 // Note: We don't need to check alignment here since (by assumption
16481 // from the existance of the gather), our offsets must be sufficiently
16482 // aligned.
16483
16484 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16485 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16486 assert(IndexType == ISD::UNSIGNED_SCALED);
16487 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16488 DAG.getConstant(Addend, DL, PtrVT));
16489
16490 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16491 SDValue IntID =
16492 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16493 XLenVT);
16494 SDValue Ops[] =
16495 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16496 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
16498 Ops, VT, MGN->getMemOperand());
16499 }
16500 }
16501
16502 SmallVector<int> ShuffleMask;
16503 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16504 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16505 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16506 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16507 MGN->getMask(), DAG.getUNDEF(VT),
16508 MGN->getMemoryVT(), MGN->getMemOperand(),
16510 SDValue Shuffle =
16511 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16512 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16513 }
16514
16515 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16516 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16517 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16518 SmallVector<SDValue> NewIndices;
16519 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16520 NewIndices.push_back(Index.getOperand(i));
16521 EVT IndexVT = Index.getValueType()
16523 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16524
16525 unsigned ElementSize = VT.getScalarStoreSize();
16526 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16527 auto EltCnt = VT.getVectorElementCount();
16528 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16529 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16530 EltCnt.divideCoefficientBy(2));
16531 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16532 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16533 EltCnt.divideCoefficientBy(2));
16534 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16535
16536 SDValue Gather =
16537 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16538 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16539 Index, ScaleOp},
16540 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16541 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16542 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16543 }
16544 break;
16545 }
16546 case ISD::MSCATTER:{
16547 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
16548 SDValue Index = MSN->getIndex();
16549 SDValue ScaleOp = MSN->getScale();
16550 ISD::MemIndexType IndexType = MSN->getIndexType();
16551 assert(!MSN->isIndexScaled() &&
16552 "Scaled gather/scatter should not be formed");
16553
16554 SDLoc DL(N);
16555 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16556 return DAG.getMaskedScatter(
16557 N->getVTList(), MSN->getMemoryVT(), DL,
16558 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16559 Index, ScaleOp},
16560 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16561
16562 if (narrowIndex(Index, IndexType, DAG))
16563 return DAG.getMaskedScatter(
16564 N->getVTList(), MSN->getMemoryVT(), DL,
16565 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16566 Index, ScaleOp},
16567 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16568
16569 EVT VT = MSN->getValue()->getValueType(0);
16570 SmallVector<int> ShuffleMask;
16571 if (!MSN->isTruncatingStore() &&
16572 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16573 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16574 DAG.getUNDEF(VT), ShuffleMask);
16575 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16576 DAG.getUNDEF(XLenVT), MSN->getMask(),
16577 MSN->getMemoryVT(), MSN->getMemOperand(),
16578 ISD::UNINDEXED, false);
16579 }
16580 break;
16581 }
16582 case ISD::VP_GATHER: {
16583 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
16584 SDValue Index = VPGN->getIndex();
16585 SDValue ScaleOp = VPGN->getScale();
16586 ISD::MemIndexType IndexType = VPGN->getIndexType();
16587 assert(!VPGN->isIndexScaled() &&
16588 "Scaled gather/scatter should not be formed");
16589
16590 SDLoc DL(N);
16591 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16592 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16593 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16594 ScaleOp, VPGN->getMask(),
16595 VPGN->getVectorLength()},
16596 VPGN->getMemOperand(), IndexType);
16597
16598 if (narrowIndex(Index, IndexType, DAG))
16599 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16600 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16601 ScaleOp, VPGN->getMask(),
16602 VPGN->getVectorLength()},
16603 VPGN->getMemOperand(), IndexType);
16604
16605 break;
16606 }
16607 case ISD::VP_SCATTER: {
16608 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
16609 SDValue Index = VPSN->getIndex();
16610 SDValue ScaleOp = VPSN->getScale();
16611 ISD::MemIndexType IndexType = VPSN->getIndexType();
16612 assert(!VPSN->isIndexScaled() &&
16613 "Scaled gather/scatter should not be formed");
16614
16615 SDLoc DL(N);
16616 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16617 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16618 {VPSN->getChain(), VPSN->getValue(),
16619 VPSN->getBasePtr(), Index, ScaleOp,
16620 VPSN->getMask(), VPSN->getVectorLength()},
16621 VPSN->getMemOperand(), IndexType);
16622
16623 if (narrowIndex(Index, IndexType, DAG))
16624 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16625 {VPSN->getChain(), VPSN->getValue(),
16626 VPSN->getBasePtr(), Index, ScaleOp,
16627 VPSN->getMask(), VPSN->getVectorLength()},
16628 VPSN->getMemOperand(), IndexType);
16629 break;
16630 }
16631 case RISCVISD::SHL_VL:
16632 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16633 return V;
16634 [[fallthrough]];
16635 case RISCVISD::SRA_VL:
16636 case RISCVISD::SRL_VL: {
16637 SDValue ShAmt = N->getOperand(1);
16639 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16640 SDLoc DL(N);
16641 SDValue VL = N->getOperand(4);
16642 EVT VT = N->getValueType(0);
16643 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16644 ShAmt.getOperand(1), VL);
16645 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16646 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16647 }
16648 break;
16649 }
16650 case ISD::SRA:
16651 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16652 return V;
16653 [[fallthrough]];
16654 case ISD::SRL:
16655 case ISD::SHL: {
16656 if (N->getOpcode() == ISD::SHL) {
16657 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16658 return V;
16659 }
16660 SDValue ShAmt = N->getOperand(1);
16662 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16663 SDLoc DL(N);
16664 EVT VT = N->getValueType(0);
16665 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16666 ShAmt.getOperand(1),
16667 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16668 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16669 }
16670 break;
16671 }
16672 case RISCVISD::ADD_VL:
16673 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16674 return V;
16675 return combineToVWMACC(N, DAG, Subtarget);
16680 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16681 case RISCVISD::SUB_VL:
16682 case RISCVISD::MUL_VL:
16683 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16692 return performVFMADD_VLCombine(N, DAG, Subtarget);
16693 case RISCVISD::FADD_VL:
16694 case RISCVISD::FSUB_VL:
16695 case RISCVISD::FMUL_VL:
16697 case RISCVISD::VFWSUB_W_VL: {
16698 if (N->getValueType(0).isScalableVector() &&
16699 N->getValueType(0).getVectorElementType() == MVT::f32 &&
16700 (Subtarget.hasVInstructionsF16Minimal() &&
16701 !Subtarget.hasVInstructionsF16()))
16702 return SDValue();
16703 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16704 }
16705 case ISD::LOAD:
16706 case ISD::STORE: {
16707 if (DCI.isAfterLegalizeDAG())
16708 if (SDValue V = performMemPairCombine(N, DCI))
16709 return V;
16710
16711 if (N->getOpcode() != ISD::STORE)
16712 break;
16713
16714 auto *Store = cast<StoreSDNode>(N);
16715 SDValue Chain = Store->getChain();
16716 EVT MemVT = Store->getMemoryVT();
16717 SDValue Val = Store->getValue();
16718 SDLoc DL(N);
16719
16720 bool IsScalarizable =
16721 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
16722 Store->isSimple() &&
16723 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
16724 isPowerOf2_64(MemVT.getSizeInBits()) &&
16725 MemVT.getSizeInBits() <= Subtarget.getXLen();
16726
16727 // If sufficiently aligned we can scalarize stores of constant vectors of
16728 // any power-of-two size up to XLen bits, provided that they aren't too
16729 // expensive to materialize.
16730 // vsetivli zero, 2, e8, m1, ta, ma
16731 // vmv.v.i v8, 4
16732 // vse64.v v8, (a0)
16733 // ->
16734 // li a1, 1028
16735 // sh a1, 0(a0)
16736 if (DCI.isBeforeLegalize() && IsScalarizable &&
16738 // Get the constant vector bits
16739 APInt NewC(Val.getValueSizeInBits(), 0);
16740 uint64_t EltSize = Val.getScalarValueSizeInBits();
16741 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
16742 if (Val.getOperand(i).isUndef())
16743 continue;
16744 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
16745 i * EltSize);
16746 }
16747 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16748
16749 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16750 true) <= 2 &&
16752 NewVT, *Store->getMemOperand())) {
16753 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
16754 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
16755 Store->getPointerInfo(), Store->getOriginalAlign(),
16756 Store->getMemOperand()->getFlags());
16757 }
16758 }
16759
16760 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16761 // vsetivli zero, 2, e16, m1, ta, ma
16762 // vle16.v v8, (a0)
16763 // vse16.v v8, (a1)
16764 if (auto *L = dyn_cast<LoadSDNode>(Val);
16765 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16766 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
16767 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
16768 L->getMemoryVT() == MemVT) {
16769 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16771 NewVT, *Store->getMemOperand()) &&
16773 NewVT, *L->getMemOperand())) {
16774 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
16775 L->getPointerInfo(), L->getOriginalAlign(),
16776 L->getMemOperand()->getFlags());
16777 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
16778 Store->getPointerInfo(), Store->getOriginalAlign(),
16779 Store->getMemOperand()->getFlags());
16780 }
16781 }
16782
16783 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
16784 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
16785 // any illegal types.
16786 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
16787 (DCI.isAfterLegalizeDAG() &&
16789 isNullConstant(Val.getOperand(1)))) {
16790 SDValue Src = Val.getOperand(0);
16791 MVT VecVT = Src.getSimpleValueType();
16792 // VecVT should be scalable and memory VT should match the element type.
16793 if (!Store->isIndexed() && VecVT.isScalableVector() &&
16794 MemVT == VecVT.getVectorElementType()) {
16795 SDLoc DL(N);
16796 MVT MaskVT = getMaskTypeFor(VecVT);
16797 return DAG.getStoreVP(
16798 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
16799 DAG.getConstant(1, DL, MaskVT),
16800 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
16801 Store->getMemOperand(), Store->getAddressingMode(),
16802 Store->isTruncatingStore(), /*IsCompress*/ false);
16803 }
16804 }
16805
16806 break;
16807 }
16808 case ISD::SPLAT_VECTOR: {
16809 EVT VT = N->getValueType(0);
16810 // Only perform this combine on legal MVT types.
16811 if (!isTypeLegal(VT))
16812 break;
16813 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
16814 DAG, Subtarget))
16815 return Gather;
16816 break;
16817 }
16818 case ISD::BUILD_VECTOR:
16819 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
16820 return V;
16821 break;
16823 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
16824 return V;
16825 break;
16827 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
16828 return V;
16829 break;
16830 case RISCVISD::VFMV_V_F_VL: {
16831 const MVT VT = N->getSimpleValueType(0);
16832 SDValue Passthru = N->getOperand(0);
16833 SDValue Scalar = N->getOperand(1);
16834 SDValue VL = N->getOperand(2);
16835
16836 // If VL is 1, we can use vfmv.s.f.
16837 if (isOneConstant(VL))
16838 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
16839 break;
16840 }
16841 case RISCVISD::VMV_V_X_VL: {
16842 const MVT VT = N->getSimpleValueType(0);
16843 SDValue Passthru = N->getOperand(0);
16844 SDValue Scalar = N->getOperand(1);
16845 SDValue VL = N->getOperand(2);
16846
16847 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
16848 // scalar input.
16849 unsigned ScalarSize = Scalar.getValueSizeInBits();
16850 unsigned EltWidth = VT.getScalarSizeInBits();
16851 if (ScalarSize > EltWidth && Passthru.isUndef())
16852 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
16853 return SDValue(N, 0);
16854
16855 // If VL is 1 and the scalar value won't benefit from immediate, we can
16856 // use vmv.s.x.
16857 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16858 if (isOneConstant(VL) &&
16859 (!Const || Const->isZero() ||
16860 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
16861 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
16862
16863 break;
16864 }
16865 case RISCVISD::VFMV_S_F_VL: {
16866 SDValue Src = N->getOperand(1);
16867 // Try to remove vector->scalar->vector if the scalar->vector is inserting
16868 // into an undef vector.
16869 // TODO: Could use a vslide or vmv.v.v for non-undef.
16870 if (N->getOperand(0).isUndef() &&
16871 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16872 isNullConstant(Src.getOperand(1)) &&
16873 Src.getOperand(0).getValueType().isScalableVector()) {
16874 EVT VT = N->getValueType(0);
16875 EVT SrcVT = Src.getOperand(0).getValueType();
16877 // Widths match, just return the original vector.
16878 if (SrcVT == VT)
16879 return Src.getOperand(0);
16880 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
16881 }
16882 [[fallthrough]];
16883 }
16884 case RISCVISD::VMV_S_X_VL: {
16885 const MVT VT = N->getSimpleValueType(0);
16886 SDValue Passthru = N->getOperand(0);
16887 SDValue Scalar = N->getOperand(1);
16888 SDValue VL = N->getOperand(2);
16889
16890 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
16891 Scalar.getOperand(0).getValueType() == N->getValueType(0))
16892 return Scalar.getOperand(0);
16893
16894 // Use M1 or smaller to avoid over constraining register allocation
16895 const MVT M1VT = getLMUL1VT(VT);
16896 if (M1VT.bitsLT(VT)) {
16897 SDValue M1Passthru =
16898 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
16899 DAG.getVectorIdxConstant(0, DL));
16900 SDValue Result =
16901 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
16902 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
16903 DAG.getVectorIdxConstant(0, DL));
16904 return Result;
16905 }
16906
16907 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
16908 // higher would involve overly constraining the register allocator for
16909 // no purpose.
16910 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16911 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
16912 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
16913 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
16914
16915 break;
16916 }
16917 case RISCVISD::VMV_X_S: {
16918 SDValue Vec = N->getOperand(0);
16919 MVT VecVT = N->getOperand(0).getSimpleValueType();
16920 const MVT M1VT = getLMUL1VT(VecVT);
16921 if (M1VT.bitsLT(VecVT)) {
16922 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
16923 DAG.getVectorIdxConstant(0, DL));
16924 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
16925 }
16926 break;
16927 }
16931 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
16932 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
16933 switch (IntNo) {
16934 // By default we do not combine any intrinsic.
16935 default:
16936 return SDValue();
16937 case Intrinsic::riscv_masked_strided_load: {
16938 MVT VT = N->getSimpleValueType(0);
16939 auto *Load = cast<MemIntrinsicSDNode>(N);
16940 SDValue PassThru = N->getOperand(2);
16941 SDValue Base = N->getOperand(3);
16942 SDValue Stride = N->getOperand(4);
16943 SDValue Mask = N->getOperand(5);
16944
16945 // If the stride is equal to the element size in bytes, we can use
16946 // a masked.load.
16947 const unsigned ElementSize = VT.getScalarStoreSize();
16948 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16949 StrideC && StrideC->getZExtValue() == ElementSize)
16950 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
16951 DAG.getUNDEF(XLenVT), Mask, PassThru,
16952 Load->getMemoryVT(), Load->getMemOperand(),
16954 return SDValue();
16955 }
16956 case Intrinsic::riscv_masked_strided_store: {
16957 auto *Store = cast<MemIntrinsicSDNode>(N);
16958 SDValue Value = N->getOperand(2);
16959 SDValue Base = N->getOperand(3);
16960 SDValue Stride = N->getOperand(4);
16961 SDValue Mask = N->getOperand(5);
16962
16963 // If the stride is equal to the element size in bytes, we can use
16964 // a masked.store.
16965 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
16966 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16967 StrideC && StrideC->getZExtValue() == ElementSize)
16968 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
16969 DAG.getUNDEF(XLenVT), Mask,
16970 Value.getValueType(), Store->getMemOperand(),
16971 ISD::UNINDEXED, false);
16972 return SDValue();
16973 }
16974 case Intrinsic::riscv_vcpop:
16975 case Intrinsic::riscv_vcpop_mask:
16976 case Intrinsic::riscv_vfirst:
16977 case Intrinsic::riscv_vfirst_mask: {
16978 SDValue VL = N->getOperand(2);
16979 if (IntNo == Intrinsic::riscv_vcpop_mask ||
16980 IntNo == Intrinsic::riscv_vfirst_mask)
16981 VL = N->getOperand(3);
16982 if (!isNullConstant(VL))
16983 return SDValue();
16984 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
16985 SDLoc DL(N);
16986 EVT VT = N->getValueType(0);
16987 if (IntNo == Intrinsic::riscv_vfirst ||
16988 IntNo == Intrinsic::riscv_vfirst_mask)
16989 return DAG.getConstant(-1, DL, VT);
16990 return DAG.getConstant(0, DL, VT);
16991 }
16992 }
16993 }
16994 case ISD::BITCAST: {
16996 SDValue N0 = N->getOperand(0);
16997 EVT VT = N->getValueType(0);
16998 EVT SrcVT = N0.getValueType();
16999 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
17000 // type, widen both sides to avoid a trip through memory.
17001 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
17002 VT.isScalarInteger()) {
17003 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
17004 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
17005 Ops[0] = N0;
17006 SDLoc DL(N);
17007 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
17008 N0 = DAG.getBitcast(MVT::i8, N0);
17009 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
17010 }
17011
17012 return SDValue();
17013 }
17014 }
17015
17016 return SDValue();
17017}
17018
17020 EVT XVT, unsigned KeptBits) const {
17021 // For vectors, we don't have a preference..
17022 if (XVT.isVector())
17023 return false;
17024
17025 if (XVT != MVT::i32 && XVT != MVT::i64)
17026 return false;
17027
17028 // We can use sext.w for RV64 or an srai 31 on RV32.
17029 if (KeptBits == 32 || KeptBits == 64)
17030 return true;
17031
17032 // With Zbb we can use sext.h/sext.b.
17033 return Subtarget.hasStdExtZbb() &&
17034 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
17035 KeptBits == 16);
17036}
17037
17039 const SDNode *N, CombineLevel Level) const {
17040 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17041 N->getOpcode() == ISD::SRL) &&
17042 "Expected shift op");
17043
17044 // The following folds are only desirable if `(OP _, c1 << c2)` can be
17045 // materialised in fewer instructions than `(OP _, c1)`:
17046 //
17047 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
17048 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
17049 SDValue N0 = N->getOperand(0);
17050 EVT Ty = N0.getValueType();
17051 if (Ty.isScalarInteger() &&
17052 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17053 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17054 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17055 if (C1 && C2) {
17056 const APInt &C1Int = C1->getAPIntValue();
17057 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
17058
17059 // We can materialise `c1 << c2` into an add immediate, so it's "free",
17060 // and the combine should happen, to potentially allow further combines
17061 // later.
17062 if (ShiftedC1Int.getSignificantBits() <= 64 &&
17063 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
17064 return true;
17065
17066 // We can materialise `c1` in an add immediate, so it's "free", and the
17067 // combine should be prevented.
17068 if (C1Int.getSignificantBits() <= 64 &&
17070 return false;
17071
17072 // Neither constant will fit into an immediate, so find materialisation
17073 // costs.
17074 int C1Cost =
17075 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
17076 /*CompressionCost*/ true);
17077 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
17078 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
17079 /*CompressionCost*/ true);
17080
17081 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
17082 // combine should be prevented.
17083 if (C1Cost < ShiftedC1Cost)
17084 return false;
17085 }
17086 }
17087 return true;
17088}
17089
17091 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17092 TargetLoweringOpt &TLO) const {
17093 // Delay this optimization as late as possible.
17094 if (!TLO.LegalOps)
17095 return false;
17096
17097 EVT VT = Op.getValueType();
17098 if (VT.isVector())
17099 return false;
17100
17101 unsigned Opcode = Op.getOpcode();
17102 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17103 return false;
17104
17105 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17106 if (!C)
17107 return false;
17108
17109 const APInt &Mask = C->getAPIntValue();
17110
17111 // Clear all non-demanded bits initially.
17112 APInt ShrunkMask = Mask & DemandedBits;
17113
17114 // Try to make a smaller immediate by setting undemanded bits.
17115
17116 APInt ExpandedMask = Mask | ~DemandedBits;
17117
17118 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17119 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17120 };
17121 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17122 if (NewMask == Mask)
17123 return true;
17124 SDLoc DL(Op);
17125 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17126 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17127 Op.getOperand(0), NewC);
17128 return TLO.CombineTo(Op, NewOp);
17129 };
17130
17131 // If the shrunk mask fits in sign extended 12 bits, let the target
17132 // independent code apply it.
17133 if (ShrunkMask.isSignedIntN(12))
17134 return false;
17135
17136 // And has a few special cases for zext.
17137 if (Opcode == ISD::AND) {
17138 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17139 // otherwise use SLLI + SRLI.
17140 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17141 if (IsLegalMask(NewMask))
17142 return UseMask(NewMask);
17143
17144 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17145 if (VT == MVT::i64) {
17146 APInt NewMask = APInt(64, 0xffffffff);
17147 if (IsLegalMask(NewMask))
17148 return UseMask(NewMask);
17149 }
17150 }
17151
17152 // For the remaining optimizations, we need to be able to make a negative
17153 // number through a combination of mask and undemanded bits.
17154 if (!ExpandedMask.isNegative())
17155 return false;
17156
17157 // What is the fewest number of bits we need to represent the negative number.
17158 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17159
17160 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17161 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17162 // If we can't create a simm12, we shouldn't change opaque constants.
17163 APInt NewMask = ShrunkMask;
17164 if (MinSignedBits <= 12)
17165 NewMask.setBitsFrom(11);
17166 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17167 NewMask.setBitsFrom(31);
17168 else
17169 return false;
17170
17171 // Check that our new mask is a subset of the demanded mask.
17172 assert(IsLegalMask(NewMask));
17173 return UseMask(NewMask);
17174}
17175
17176static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17177 static const uint64_t GREVMasks[] = {
17178 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17179 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17180
17181 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17182 unsigned Shift = 1 << Stage;
17183 if (ShAmt & Shift) {
17184 uint64_t Mask = GREVMasks[Stage];
17185 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17186 if (IsGORC)
17187 Res |= x;
17188 x = Res;
17189 }
17190 }
17191
17192 return x;
17193}
17194
17196 KnownBits &Known,
17197 const APInt &DemandedElts,
17198 const SelectionDAG &DAG,
17199 unsigned Depth) const {
17200 unsigned BitWidth = Known.getBitWidth();
17201 unsigned Opc = Op.getOpcode();
17202 assert((Opc >= ISD::BUILTIN_OP_END ||
17203 Opc == ISD::INTRINSIC_WO_CHAIN ||
17204 Opc == ISD::INTRINSIC_W_CHAIN ||
17205 Opc == ISD::INTRINSIC_VOID) &&
17206 "Should use MaskedValueIsZero if you don't know whether Op"
17207 " is a target node!");
17208
17209 Known.resetAll();
17210 switch (Opc) {
17211 default: break;
17212 case RISCVISD::SELECT_CC: {
17213 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17214 // If we don't know any bits, early out.
17215 if (Known.isUnknown())
17216 break;
17217 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17218
17219 // Only known if known in both the LHS and RHS.
17220 Known = Known.intersectWith(Known2);
17221 break;
17222 }
17225 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17226 // Result is either all zero or operand 0. We can propagate zeros, but not
17227 // ones.
17228 Known.One.clearAllBits();
17229 break;
17230 case RISCVISD::REMUW: {
17231 KnownBits Known2;
17232 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17233 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17234 // We only care about the lower 32 bits.
17235 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17236 // Restore the original width by sign extending.
17237 Known = Known.sext(BitWidth);
17238 break;
17239 }
17240 case RISCVISD::DIVUW: {
17241 KnownBits Known2;
17242 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17243 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17244 // We only care about the lower 32 bits.
17245 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17246 // Restore the original width by sign extending.
17247 Known = Known.sext(BitWidth);
17248 break;
17249 }
17250 case RISCVISD::SLLW: {
17251 KnownBits Known2;
17252 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17253 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17254 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17255 // Restore the original width by sign extending.
17256 Known = Known.sext(BitWidth);
17257 break;
17258 }
17259 case RISCVISD::CTZW: {
17260 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17261 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17262 unsigned LowBits = llvm::bit_width(PossibleTZ);
17263 Known.Zero.setBitsFrom(LowBits);
17264 break;
17265 }
17266 case RISCVISD::CLZW: {
17267 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17268 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17269 unsigned LowBits = llvm::bit_width(PossibleLZ);
17270 Known.Zero.setBitsFrom(LowBits);
17271 break;
17272 }
17273 case RISCVISD::BREV8:
17274 case RISCVISD::ORC_B: {
17275 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17276 // control value of 7 is equivalent to brev8 and orc.b.
17277 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17278 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17279 // To compute zeros, we need to invert the value and invert it back after.
17280 Known.Zero =
17281 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17282 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17283 break;
17284 }
17285 case RISCVISD::READ_VLENB: {
17286 // We can use the minimum and maximum VLEN values to bound VLENB. We
17287 // know VLEN must be a power of two.
17288 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17289 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17290 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17291 Known.Zero.setLowBits(Log2_32(MinVLenB));
17292 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17293 if (MaxVLenB == MinVLenB)
17294 Known.One.setBit(Log2_32(MinVLenB));
17295 break;
17296 }
17297 case RISCVISD::FCLASS: {
17298 // fclass will only set one of the low 10 bits.
17299 Known.Zero.setBitsFrom(10);
17300 break;
17301 }
17304 unsigned IntNo =
17305 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17306 switch (IntNo) {
17307 default:
17308 // We can't do anything for most intrinsics.
17309 break;
17310 case Intrinsic::riscv_vsetvli:
17311 case Intrinsic::riscv_vsetvlimax: {
17312 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17313 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17314 RISCVII::VLMUL VLMUL =
17315 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17316 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17317 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17318 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17319 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17320
17321 // Result of vsetvli must be not larger than AVL.
17322 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17323 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17324
17325 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17326 if (BitWidth > KnownZeroFirstBit)
17327 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17328 break;
17329 }
17330 }
17331 break;
17332 }
17333 }
17334}
17335
17337 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17338 unsigned Depth) const {
17339 switch (Op.getOpcode()) {
17340 default:
17341 break;
17342 case RISCVISD::SELECT_CC: {
17343 unsigned Tmp =
17344 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17345 if (Tmp == 1) return 1; // Early out.
17346 unsigned Tmp2 =
17347 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17348 return std::min(Tmp, Tmp2);
17349 }
17352 // Output is either all zero or operand 0. We can propagate sign bit count
17353 // from operand 0.
17354 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17355 case RISCVISD::ABSW: {
17356 // We expand this at isel to negw+max. The result will have 33 sign bits
17357 // if the input has at least 33 sign bits.
17358 unsigned Tmp =
17359 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17360 if (Tmp < 33) return 1;
17361 return 33;
17362 }
17363 case RISCVISD::SLLW:
17364 case RISCVISD::SRAW:
17365 case RISCVISD::SRLW:
17366 case RISCVISD::DIVW:
17367 case RISCVISD::DIVUW:
17368 case RISCVISD::REMUW:
17369 case RISCVISD::ROLW:
17370 case RISCVISD::RORW:
17375 // TODO: As the result is sign-extended, this is conservatively correct. A
17376 // more precise answer could be calculated for SRAW depending on known
17377 // bits in the shift amount.
17378 return 33;
17379 case RISCVISD::VMV_X_S: {
17380 // The number of sign bits of the scalar result is computed by obtaining the
17381 // element type of the input vector operand, subtracting its width from the
17382 // XLEN, and then adding one (sign bit within the element type). If the
17383 // element type is wider than XLen, the least-significant XLEN bits are
17384 // taken.
17385 unsigned XLen = Subtarget.getXLen();
17386 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17387 if (EltBits <= XLen)
17388 return XLen - EltBits + 1;
17389 break;
17390 }
17392 unsigned IntNo = Op.getConstantOperandVal(1);
17393 switch (IntNo) {
17394 default:
17395 break;
17396 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17397 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17398 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17399 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17400 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17401 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17402 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17403 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17404 case Intrinsic::riscv_masked_cmpxchg_i64:
17405 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17406 // narrow atomic operation. These are implemented using atomic
17407 // operations at the minimum supported atomicrmw/cmpxchg width whose
17408 // result is then sign extended to XLEN. With +A, the minimum width is
17409 // 32 for both 64 and 32.
17410 assert(Subtarget.getXLen() == 64);
17412 assert(Subtarget.hasStdExtA());
17413 return 33;
17414 }
17415 break;
17416 }
17417 }
17418
17419 return 1;
17420}
17421
17423 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17424 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17425
17426 // TODO: Add more target nodes.
17427 switch (Op.getOpcode()) {
17429 // Integer select_cc cannot create poison.
17430 // TODO: What are the FP poison semantics?
17431 // TODO: This instruction blocks poison from the unselected operand, can
17432 // we do anything with that?
17433 return !Op.getValueType().isInteger();
17434 }
17436 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17437}
17438
17439const Constant *
17441 assert(Ld && "Unexpected null LoadSDNode");
17442 if (!ISD::isNormalLoad(Ld))
17443 return nullptr;
17444
17445 SDValue Ptr = Ld->getBasePtr();
17446
17447 // Only constant pools with no offset are supported.
17448 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17449 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17450 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17451 CNode->getOffset() != 0)
17452 return nullptr;
17453
17454 return CNode;
17455 };
17456
17457 // Simple case, LLA.
17458 if (Ptr.getOpcode() == RISCVISD::LLA) {
17459 auto *CNode = GetSupportedConstantPool(Ptr);
17460 if (!CNode || CNode->getTargetFlags() != 0)
17461 return nullptr;
17462
17463 return CNode->getConstVal();
17464 }
17465
17466 // Look for a HI and ADD_LO pair.
17467 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17468 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17469 return nullptr;
17470
17471 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17472 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17473
17474 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17475 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17476 return nullptr;
17477
17478 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17479 return nullptr;
17480
17481 return CNodeLo->getConstVal();
17482}
17483
17485 MachineBasicBlock *BB) {
17486 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17487
17488 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17489 // Should the count have wrapped while it was being read, we need to try
17490 // again.
17491 // For example:
17492 // ```
17493 // read:
17494 // csrrs x3, counterh # load high word of counter
17495 // csrrs x2, counter # load low word of counter
17496 // csrrs x4, counterh # load high word of counter
17497 // bne x3, x4, read # check if high word reads match, otherwise try again
17498 // ```
17499
17500 MachineFunction &MF = *BB->getParent();
17501 const BasicBlock *LLVMBB = BB->getBasicBlock();
17503
17504 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17505 MF.insert(It, LoopMBB);
17506
17507 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17508 MF.insert(It, DoneMBB);
17509
17510 // Transfer the remainder of BB and its successor edges to DoneMBB.
17511 DoneMBB->splice(DoneMBB->begin(), BB,
17512 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17514
17515 BB->addSuccessor(LoopMBB);
17516
17518 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17519 Register LoReg = MI.getOperand(0).getReg();
17520 Register HiReg = MI.getOperand(1).getReg();
17521 int64_t LoCounter = MI.getOperand(2).getImm();
17522 int64_t HiCounter = MI.getOperand(3).getImm();
17523 DebugLoc DL = MI.getDebugLoc();
17524
17526 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17527 .addImm(HiCounter)
17528 .addReg(RISCV::X0);
17529 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17530 .addImm(LoCounter)
17531 .addReg(RISCV::X0);
17532 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17533 .addImm(HiCounter)
17534 .addReg(RISCV::X0);
17535
17536 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17537 .addReg(HiReg)
17538 .addReg(ReadAgainReg)
17539 .addMBB(LoopMBB);
17540
17541 LoopMBB->addSuccessor(LoopMBB);
17542 LoopMBB->addSuccessor(DoneMBB);
17543
17544 MI.eraseFromParent();
17545
17546 return DoneMBB;
17547}
17548
17551 const RISCVSubtarget &Subtarget) {
17552 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17553
17554 MachineFunction &MF = *BB->getParent();
17555 DebugLoc DL = MI.getDebugLoc();
17558 Register LoReg = MI.getOperand(0).getReg();
17559 Register HiReg = MI.getOperand(1).getReg();
17560 Register SrcReg = MI.getOperand(2).getReg();
17561
17562 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17563 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17564
17565 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17566 RI, Register());
17568 MachineMemOperand *MMOLo =
17572 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17573 .addFrameIndex(FI)
17574 .addImm(0)
17575 .addMemOperand(MMOLo);
17576 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17577 .addFrameIndex(FI)
17578 .addImm(4)
17579 .addMemOperand(MMOHi);
17580 MI.eraseFromParent(); // The pseudo instruction is gone now.
17581 return BB;
17582}
17583
17586 const RISCVSubtarget &Subtarget) {
17587 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17588 "Unexpected instruction");
17589
17590 MachineFunction &MF = *BB->getParent();
17591 DebugLoc DL = MI.getDebugLoc();
17594 Register DstReg = MI.getOperand(0).getReg();
17595 Register LoReg = MI.getOperand(1).getReg();
17596 Register HiReg = MI.getOperand(2).getReg();
17597
17598 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17599 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17600
17602 MachineMemOperand *MMOLo =
17606 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17607 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17608 .addFrameIndex(FI)
17609 .addImm(0)
17610 .addMemOperand(MMOLo);
17611 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17612 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17613 .addFrameIndex(FI)
17614 .addImm(4)
17615 .addMemOperand(MMOHi);
17616 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17617 MI.eraseFromParent(); // The pseudo instruction is gone now.
17618 return BB;
17619}
17620
17622 switch (MI.getOpcode()) {
17623 default:
17624 return false;
17625 case RISCV::Select_GPR_Using_CC_GPR:
17626 case RISCV::Select_FPR16_Using_CC_GPR:
17627 case RISCV::Select_FPR16INX_Using_CC_GPR:
17628 case RISCV::Select_FPR32_Using_CC_GPR:
17629 case RISCV::Select_FPR32INX_Using_CC_GPR:
17630 case RISCV::Select_FPR64_Using_CC_GPR:
17631 case RISCV::Select_FPR64INX_Using_CC_GPR:
17632 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17633 return true;
17634 }
17635}
17636
17638 unsigned RelOpcode, unsigned EqOpcode,
17639 const RISCVSubtarget &Subtarget) {
17640 DebugLoc DL = MI.getDebugLoc();
17641 Register DstReg = MI.getOperand(0).getReg();
17642 Register Src1Reg = MI.getOperand(1).getReg();
17643 Register Src2Reg = MI.getOperand(2).getReg();
17645 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17647
17648 // Save the current FFLAGS.
17649 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17650
17651 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17652 .addReg(Src1Reg)
17653 .addReg(Src2Reg);
17656
17657 // Restore the FFLAGS.
17658 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17659 .addReg(SavedFFlags, RegState::Kill);
17660
17661 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17662 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17663 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17664 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17667
17668 // Erase the pseudoinstruction.
17669 MI.eraseFromParent();
17670 return BB;
17671}
17672
17673static MachineBasicBlock *
17675 MachineBasicBlock *ThisMBB,
17676 const RISCVSubtarget &Subtarget) {
17677 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17678 // Without this, custom-inserter would have generated:
17679 //
17680 // A
17681 // | \
17682 // | B
17683 // | /
17684 // C
17685 // | \
17686 // | D
17687 // | /
17688 // E
17689 //
17690 // A: X = ...; Y = ...
17691 // B: empty
17692 // C: Z = PHI [X, A], [Y, B]
17693 // D: empty
17694 // E: PHI [X, C], [Z, D]
17695 //
17696 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17697 //
17698 // A
17699 // | \
17700 // | C
17701 // | /|
17702 // |/ |
17703 // | |
17704 // | D
17705 // | /
17706 // E
17707 //
17708 // A: X = ...; Y = ...
17709 // D: empty
17710 // E: PHI [X, A], [X, C], [Y, D]
17711
17712 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17713 const DebugLoc &DL = First.getDebugLoc();
17714 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17715 MachineFunction *F = ThisMBB->getParent();
17716 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17717 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17718 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17719 MachineFunction::iterator It = ++ThisMBB->getIterator();
17720 F->insert(It, FirstMBB);
17721 F->insert(It, SecondMBB);
17722 F->insert(It, SinkMBB);
17723
17724 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17725 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17727 ThisMBB->end());
17728 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17729
17730 // Fallthrough block for ThisMBB.
17731 ThisMBB->addSuccessor(FirstMBB);
17732 // Fallthrough block for FirstMBB.
17733 FirstMBB->addSuccessor(SecondMBB);
17734 ThisMBB->addSuccessor(SinkMBB);
17735 FirstMBB->addSuccessor(SinkMBB);
17736 // This is fallthrough.
17737 SecondMBB->addSuccessor(SinkMBB);
17738
17739 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17740 Register FLHS = First.getOperand(1).getReg();
17741 Register FRHS = First.getOperand(2).getReg();
17742 // Insert appropriate branch.
17743 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17744 .addReg(FLHS)
17745 .addReg(FRHS)
17746 .addMBB(SinkMBB);
17747
17748 Register SLHS = Second.getOperand(1).getReg();
17749 Register SRHS = Second.getOperand(2).getReg();
17750 Register Op1Reg4 = First.getOperand(4).getReg();
17751 Register Op1Reg5 = First.getOperand(5).getReg();
17752
17753 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17754 // Insert appropriate branch.
17755 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17756 .addReg(SLHS)
17757 .addReg(SRHS)
17758 .addMBB(SinkMBB);
17759
17760 Register DestReg = Second.getOperand(0).getReg();
17761 Register Op2Reg4 = Second.getOperand(4).getReg();
17762 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17763 .addReg(Op2Reg4)
17764 .addMBB(ThisMBB)
17765 .addReg(Op1Reg4)
17766 .addMBB(FirstMBB)
17767 .addReg(Op1Reg5)
17768 .addMBB(SecondMBB);
17769
17770 // Now remove the Select_FPRX_s.
17771 First.eraseFromParent();
17772 Second.eraseFromParent();
17773 return SinkMBB;
17774}
17775
17778 const RISCVSubtarget &Subtarget) {
17779 // To "insert" Select_* instructions, we actually have to insert the triangle
17780 // control-flow pattern. The incoming instructions know the destination vreg
17781 // to set, the condition code register to branch on, the true/false values to
17782 // select between, and the condcode to use to select the appropriate branch.
17783 //
17784 // We produce the following control flow:
17785 // HeadMBB
17786 // | \
17787 // | IfFalseMBB
17788 // | /
17789 // TailMBB
17790 //
17791 // When we find a sequence of selects we attempt to optimize their emission
17792 // by sharing the control flow. Currently we only handle cases where we have
17793 // multiple selects with the exact same condition (same LHS, RHS and CC).
17794 // The selects may be interleaved with other instructions if the other
17795 // instructions meet some requirements we deem safe:
17796 // - They are not pseudo instructions.
17797 // - They are debug instructions. Otherwise,
17798 // - They do not have side-effects, do not access memory and their inputs do
17799 // not depend on the results of the select pseudo-instructions.
17800 // The TrueV/FalseV operands of the selects cannot depend on the result of
17801 // previous selects in the sequence.
17802 // These conditions could be further relaxed. See the X86 target for a
17803 // related approach and more information.
17804 //
17805 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
17806 // is checked here and handled by a separate function -
17807 // EmitLoweredCascadedSelect.
17808 Register LHS = MI.getOperand(1).getReg();
17809 Register RHS = MI.getOperand(2).getReg();
17810 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
17811
17812 SmallVector<MachineInstr *, 4> SelectDebugValues;
17813 SmallSet<Register, 4> SelectDests;
17814 SelectDests.insert(MI.getOperand(0).getReg());
17815
17816 MachineInstr *LastSelectPseudo = &MI;
17817 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
17818 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
17819 Next->getOpcode() == MI.getOpcode() &&
17820 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
17821 Next->getOperand(5).isKill()) {
17822 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
17823 }
17824
17825 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
17826 SequenceMBBI != E; ++SequenceMBBI) {
17827 if (SequenceMBBI->isDebugInstr())
17828 continue;
17829 if (isSelectPseudo(*SequenceMBBI)) {
17830 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
17831 SequenceMBBI->getOperand(2).getReg() != RHS ||
17832 SequenceMBBI->getOperand(3).getImm() != CC ||
17833 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
17834 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
17835 break;
17836 LastSelectPseudo = &*SequenceMBBI;
17837 SequenceMBBI->collectDebugValues(SelectDebugValues);
17838 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
17839 continue;
17840 }
17841 if (SequenceMBBI->hasUnmodeledSideEffects() ||
17842 SequenceMBBI->mayLoadOrStore() ||
17843 SequenceMBBI->usesCustomInsertionHook())
17844 break;
17845 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
17846 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
17847 }))
17848 break;
17849 }
17850
17851 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17852 const BasicBlock *LLVM_BB = BB->getBasicBlock();
17853 DebugLoc DL = MI.getDebugLoc();
17855
17856 MachineBasicBlock *HeadMBB = BB;
17857 MachineFunction *F = BB->getParent();
17858 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
17859 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
17860
17861 F->insert(I, IfFalseMBB);
17862 F->insert(I, TailMBB);
17863
17864 // Transfer debug instructions associated with the selects to TailMBB.
17865 for (MachineInstr *DebugInstr : SelectDebugValues) {
17866 TailMBB->push_back(DebugInstr->removeFromParent());
17867 }
17868
17869 // Move all instructions after the sequence to TailMBB.
17870 TailMBB->splice(TailMBB->end(), HeadMBB,
17871 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
17872 // Update machine-CFG edges by transferring all successors of the current
17873 // block to the new block which will contain the Phi nodes for the selects.
17874 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
17875 // Set the successors for HeadMBB.
17876 HeadMBB->addSuccessor(IfFalseMBB);
17877 HeadMBB->addSuccessor(TailMBB);
17878
17879 // Insert appropriate branch.
17880 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
17881 .addReg(LHS)
17882 .addReg(RHS)
17883 .addMBB(TailMBB);
17884
17885 // IfFalseMBB just falls through to TailMBB.
17886 IfFalseMBB->addSuccessor(TailMBB);
17887
17888 // Create PHIs for all of the select pseudo-instructions.
17889 auto SelectMBBI = MI.getIterator();
17890 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
17891 auto InsertionPoint = TailMBB->begin();
17892 while (SelectMBBI != SelectEnd) {
17893 auto Next = std::next(SelectMBBI);
17894 if (isSelectPseudo(*SelectMBBI)) {
17895 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
17896 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
17897 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
17898 .addReg(SelectMBBI->getOperand(4).getReg())
17899 .addMBB(HeadMBB)
17900 .addReg(SelectMBBI->getOperand(5).getReg())
17901 .addMBB(IfFalseMBB);
17902 SelectMBBI->eraseFromParent();
17903 }
17904 SelectMBBI = Next;
17905 }
17906
17907 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
17908 return TailMBB;
17909}
17910
17911// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
17912static const RISCV::RISCVMaskedPseudoInfo *
17913lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
17915 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
17916 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
17917 const RISCV::RISCVMaskedPseudoInfo *Masked =
17918 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
17919 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
17920 return Masked;
17921}
17922
17925 unsigned CVTXOpc) {
17926 DebugLoc DL = MI.getDebugLoc();
17927
17929
17931 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17932
17933 // Save the old value of FFLAGS.
17934 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
17935
17936 assert(MI.getNumOperands() == 7);
17937
17938 // Emit a VFCVT_X_F
17939 const TargetRegisterInfo *TRI =
17941 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
17942 Register Tmp = MRI.createVirtualRegister(RC);
17943 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
17944 .add(MI.getOperand(1))
17945 .add(MI.getOperand(2))
17946 .add(MI.getOperand(3))
17947 .add(MachineOperand::CreateImm(7)) // frm = DYN
17948 .add(MI.getOperand(4))
17949 .add(MI.getOperand(5))
17950 .add(MI.getOperand(6))
17951 .add(MachineOperand::CreateReg(RISCV::FRM,
17952 /*IsDef*/ false,
17953 /*IsImp*/ true));
17954
17955 // Emit a VFCVT_F_X
17956 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
17957 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
17958 // There is no E8 variant for VFCVT_F_X.
17959 assert(Log2SEW >= 4);
17960 unsigned CVTFOpc =
17961 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
17962 ->MaskedPseudo;
17963
17964 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
17965 .add(MI.getOperand(0))
17966 .add(MI.getOperand(1))
17967 .addReg(Tmp)
17968 .add(MI.getOperand(3))
17969 .add(MachineOperand::CreateImm(7)) // frm = DYN
17970 .add(MI.getOperand(4))
17971 .add(MI.getOperand(5))
17972 .add(MI.getOperand(6))
17973 .add(MachineOperand::CreateReg(RISCV::FRM,
17974 /*IsDef*/ false,
17975 /*IsImp*/ true));
17976
17977 // Restore FFLAGS.
17978 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17979 .addReg(SavedFFLAGS, RegState::Kill);
17980
17981 // Erase the pseudoinstruction.
17982 MI.eraseFromParent();
17983 return BB;
17984}
17985
17987 const RISCVSubtarget &Subtarget) {
17988 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
17989 const TargetRegisterClass *RC;
17990 switch (MI.getOpcode()) {
17991 default:
17992 llvm_unreachable("Unexpected opcode");
17993 case RISCV::PseudoFROUND_H:
17994 CmpOpc = RISCV::FLT_H;
17995 F2IOpc = RISCV::FCVT_W_H;
17996 I2FOpc = RISCV::FCVT_H_W;
17997 FSGNJOpc = RISCV::FSGNJ_H;
17998 FSGNJXOpc = RISCV::FSGNJX_H;
17999 RC = &RISCV::FPR16RegClass;
18000 break;
18001 case RISCV::PseudoFROUND_H_INX:
18002 CmpOpc = RISCV::FLT_H_INX;
18003 F2IOpc = RISCV::FCVT_W_H_INX;
18004 I2FOpc = RISCV::FCVT_H_W_INX;
18005 FSGNJOpc = RISCV::FSGNJ_H_INX;
18006 FSGNJXOpc = RISCV::FSGNJX_H_INX;
18007 RC = &RISCV::GPRF16RegClass;
18008 break;
18009 case RISCV::PseudoFROUND_S:
18010 CmpOpc = RISCV::FLT_S;
18011 F2IOpc = RISCV::FCVT_W_S;
18012 I2FOpc = RISCV::FCVT_S_W;
18013 FSGNJOpc = RISCV::FSGNJ_S;
18014 FSGNJXOpc = RISCV::FSGNJX_S;
18015 RC = &RISCV::FPR32RegClass;
18016 break;
18017 case RISCV::PseudoFROUND_S_INX:
18018 CmpOpc = RISCV::FLT_S_INX;
18019 F2IOpc = RISCV::FCVT_W_S_INX;
18020 I2FOpc = RISCV::FCVT_S_W_INX;
18021 FSGNJOpc = RISCV::FSGNJ_S_INX;
18022 FSGNJXOpc = RISCV::FSGNJX_S_INX;
18023 RC = &RISCV::GPRF32RegClass;
18024 break;
18025 case RISCV::PseudoFROUND_D:
18026 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18027 CmpOpc = RISCV::FLT_D;
18028 F2IOpc = RISCV::FCVT_L_D;
18029 I2FOpc = RISCV::FCVT_D_L;
18030 FSGNJOpc = RISCV::FSGNJ_D;
18031 FSGNJXOpc = RISCV::FSGNJX_D;
18032 RC = &RISCV::FPR64RegClass;
18033 break;
18034 case RISCV::PseudoFROUND_D_INX:
18035 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18036 CmpOpc = RISCV::FLT_D_INX;
18037 F2IOpc = RISCV::FCVT_L_D_INX;
18038 I2FOpc = RISCV::FCVT_D_L_INX;
18039 FSGNJOpc = RISCV::FSGNJ_D_INX;
18040 FSGNJXOpc = RISCV::FSGNJX_D_INX;
18041 RC = &RISCV::GPRRegClass;
18042 break;
18043 }
18044
18045 const BasicBlock *BB = MBB->getBasicBlock();
18046 DebugLoc DL = MI.getDebugLoc();
18048
18050 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
18051 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
18052
18053 F->insert(I, CvtMBB);
18054 F->insert(I, DoneMBB);
18055 // Move all instructions after the sequence to DoneMBB.
18056 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
18057 MBB->end());
18058 // Update machine-CFG edges by transferring all successors of the current
18059 // block to the new block which will contain the Phi nodes for the selects.
18061 // Set the successors for MBB.
18062 MBB->addSuccessor(CvtMBB);
18063 MBB->addSuccessor(DoneMBB);
18064
18065 Register DstReg = MI.getOperand(0).getReg();
18066 Register SrcReg = MI.getOperand(1).getReg();
18067 Register MaxReg = MI.getOperand(2).getReg();
18068 int64_t FRM = MI.getOperand(3).getImm();
18069
18070 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18072
18073 Register FabsReg = MRI.createVirtualRegister(RC);
18074 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
18075
18076 // Compare the FP value to the max value.
18077 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18078 auto MIB =
18079 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18082
18083 // Insert branch.
18084 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18085 .addReg(CmpReg)
18086 .addReg(RISCV::X0)
18087 .addMBB(DoneMBB);
18088
18089 CvtMBB->addSuccessor(DoneMBB);
18090
18091 // Convert to integer.
18092 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18093 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18096
18097 // Convert back to FP.
18098 Register I2FReg = MRI.createVirtualRegister(RC);
18099 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18102
18103 // Restore the sign bit.
18104 Register CvtReg = MRI.createVirtualRegister(RC);
18105 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18106
18107 // Merge the results.
18108 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18109 .addReg(SrcReg)
18110 .addMBB(MBB)
18111 .addReg(CvtReg)
18112 .addMBB(CvtMBB);
18113
18114 MI.eraseFromParent();
18115 return DoneMBB;
18116}
18117
18120 MachineBasicBlock *BB) const {
18121 switch (MI.getOpcode()) {
18122 default:
18123 llvm_unreachable("Unexpected instr type to insert");
18124 case RISCV::ReadCounterWide:
18125 assert(!Subtarget.is64Bit() &&
18126 "ReadCounterWide is only to be used on riscv32");
18127 return emitReadCounterWidePseudo(MI, BB);
18128 case RISCV::Select_GPR_Using_CC_GPR:
18129 case RISCV::Select_FPR16_Using_CC_GPR:
18130 case RISCV::Select_FPR16INX_Using_CC_GPR:
18131 case RISCV::Select_FPR32_Using_CC_GPR:
18132 case RISCV::Select_FPR32INX_Using_CC_GPR:
18133 case RISCV::Select_FPR64_Using_CC_GPR:
18134 case RISCV::Select_FPR64INX_Using_CC_GPR:
18135 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18136 return emitSelectPseudo(MI, BB, Subtarget);
18137 case RISCV::BuildPairF64Pseudo:
18138 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18139 case RISCV::SplitF64Pseudo:
18140 return emitSplitF64Pseudo(MI, BB, Subtarget);
18141 case RISCV::PseudoQuietFLE_H:
18142 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18143 case RISCV::PseudoQuietFLE_H_INX:
18144 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18145 case RISCV::PseudoQuietFLT_H:
18146 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18147 case RISCV::PseudoQuietFLT_H_INX:
18148 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18149 case RISCV::PseudoQuietFLE_S:
18150 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18151 case RISCV::PseudoQuietFLE_S_INX:
18152 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18153 case RISCV::PseudoQuietFLT_S:
18154 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18155 case RISCV::PseudoQuietFLT_S_INX:
18156 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18157 case RISCV::PseudoQuietFLE_D:
18158 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18159 case RISCV::PseudoQuietFLE_D_INX:
18160 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18161 case RISCV::PseudoQuietFLE_D_IN32X:
18162 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18163 Subtarget);
18164 case RISCV::PseudoQuietFLT_D:
18165 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18166 case RISCV::PseudoQuietFLT_D_INX:
18167 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18168 case RISCV::PseudoQuietFLT_D_IN32X:
18169 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18170 Subtarget);
18171
18172 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18173 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18174 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18175 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18176 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18177 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18178 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18179 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18180 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18181 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18182 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18183 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18184 case RISCV::PseudoFROUND_H:
18185 case RISCV::PseudoFROUND_H_INX:
18186 case RISCV::PseudoFROUND_S:
18187 case RISCV::PseudoFROUND_S_INX:
18188 case RISCV::PseudoFROUND_D:
18189 case RISCV::PseudoFROUND_D_INX:
18190 case RISCV::PseudoFROUND_D_IN32X:
18191 return emitFROUND(MI, BB, Subtarget);
18192 case TargetOpcode::STATEPOINT:
18193 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18194 // while jal call instruction (where statepoint will be lowered at the end)
18195 // has implicit def. This def is early-clobber as it will be set at
18196 // the moment of the call and earlier than any use is read.
18197 // Add this implicit dead def here as a workaround.
18198 MI.addOperand(*MI.getMF(),
18200 RISCV::X1, /*isDef*/ true,
18201 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18202 /*isUndef*/ false, /*isEarlyClobber*/ true));
18203 [[fallthrough]];
18204 case TargetOpcode::STACKMAP:
18205 case TargetOpcode::PATCHPOINT:
18206 if (!Subtarget.is64Bit())
18207 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18208 "supported on 64-bit targets");
18209 return emitPatchPoint(MI, BB);
18210 }
18211}
18212
18214 SDNode *Node) const {
18215 // Add FRM dependency to any instructions with dynamic rounding mode.
18216 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18217 if (Idx < 0) {
18218 // Vector pseudos have FRM index indicated by TSFlags.
18219 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18220 if (Idx < 0)
18221 return;
18222 }
18223 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18224 return;
18225 // If the instruction already reads FRM, don't add another read.
18226 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18227 return;
18228 MI.addOperand(
18229 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18230}
18231
18232// Calling Convention Implementation.
18233// The expectations for frontend ABI lowering vary from target to target.
18234// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18235// details, but this is a longer term goal. For now, we simply try to keep the
18236// role of the frontend as simple and well-defined as possible. The rules can
18237// be summarised as:
18238// * Never split up large scalar arguments. We handle them here.
18239// * If a hardfloat calling convention is being used, and the struct may be
18240// passed in a pair of registers (fp+fp, int+fp), and both registers are
18241// available, then pass as two separate arguments. If either the GPRs or FPRs
18242// are exhausted, then pass according to the rule below.
18243// * If a struct could never be passed in registers or directly in a stack
18244// slot (as it is larger than 2*XLEN and the floating point rules don't
18245// apply), then pass it using a pointer with the byval attribute.
18246// * If a struct is less than 2*XLEN, then coerce to either a two-element
18247// word-sized array or a 2*XLEN scalar (depending on alignment).
18248// * The frontend can determine whether a struct is returned by reference or
18249// not based on its size and fields. If it will be returned by reference, the
18250// frontend must modify the prototype so a pointer with the sret annotation is
18251// passed as the first argument. This is not necessary for large scalar
18252// returns.
18253// * Struct return values and varargs should be coerced to structs containing
18254// register-size fields in the same situations they would be for fixed
18255// arguments.
18256
18257static const MCPhysReg ArgFPR16s[] = {
18258 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18259 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18260};
18261static const MCPhysReg ArgFPR32s[] = {
18262 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18263 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18264};
18265static const MCPhysReg ArgFPR64s[] = {
18266 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18267 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18268};
18269// This is an interim calling convention and it may be changed in the future.
18270static const MCPhysReg ArgVRs[] = {
18271 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18272 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18273 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18274static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18275 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18276 RISCV::V20M2, RISCV::V22M2};
18277static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18278 RISCV::V20M4};
18279static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18280
18282 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18283 // the ILP32E ABI.
18284 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18285 RISCV::X13, RISCV::X14, RISCV::X15,
18286 RISCV::X16, RISCV::X17};
18287 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18288 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18289 RISCV::X13, RISCV::X14, RISCV::X15};
18290
18291 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18292 return ArrayRef(ArgEGPRs);
18293
18294 return ArrayRef(ArgIGPRs);
18295}
18296
18298 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18299 // for save-restore libcall, so we don't use them.
18300 static const MCPhysReg FastCCIGPRs[] = {
18301 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
18302 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
18303 RISCV::X29, RISCV::X30, RISCV::X31};
18304
18305 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18306 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18307 RISCV::X13, RISCV::X14, RISCV::X15,
18308 RISCV::X7};
18309
18310 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18311 return ArrayRef(FastCCEGPRs);
18312
18313 return ArrayRef(FastCCIGPRs);
18314}
18315
18316// Pass a 2*XLEN argument that has been split into two XLEN values through
18317// registers or the stack as necessary.
18318static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18319 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18320 MVT ValVT2, MVT LocVT2,
18321 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18322 unsigned XLenInBytes = XLen / 8;
18323 const RISCVSubtarget &STI =
18326
18327 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18328 // At least one half can be passed via register.
18329 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18330 VA1.getLocVT(), CCValAssign::Full));
18331 } else {
18332 // Both halves must be passed on the stack, with proper alignment.
18333 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18334 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18335 Align StackAlign(XLenInBytes);
18336 if (!EABI || XLen != 32)
18337 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18338 State.addLoc(
18340 State.AllocateStack(XLenInBytes, StackAlign),
18341 VA1.getLocVT(), CCValAssign::Full));
18343 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18344 LocVT2, CCValAssign::Full));
18345 return false;
18346 }
18347
18348 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18349 // The second half can also be passed via register.
18350 State.addLoc(
18351 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18352 } else {
18353 // The second half is passed via the stack, without additional alignment.
18355 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18356 LocVT2, CCValAssign::Full));
18357 }
18358
18359 return false;
18360}
18361
18362// Implements the RISC-V calling convention. Returns true upon failure.
18363bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18364 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18365 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18366 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18367 RVVArgDispatcher &RVVDispatcher) {
18368 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18369 assert(XLen == 32 || XLen == 64);
18370 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18371
18372 // Static chain parameter must not be passed in normal argument registers,
18373 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18374 if (ArgFlags.isNest()) {
18375 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18376 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18377 return false;
18378 }
18379 }
18380
18381 // Any return value split in to more than two values can't be returned
18382 // directly. Vectors are returned via the available vector registers.
18383 if (!LocVT.isVector() && IsRet && ValNo > 1)
18384 return true;
18385
18386 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18387 // variadic argument, or if no F16/F32 argument registers are available.
18388 bool UseGPRForF16_F32 = true;
18389 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18390 // variadic argument, or if no F64 argument registers are available.
18391 bool UseGPRForF64 = true;
18392
18393 switch (ABI) {
18394 default:
18395 llvm_unreachable("Unexpected ABI");
18398 case RISCVABI::ABI_LP64:
18400 break;
18403 UseGPRForF16_F32 = !IsFixed;
18404 break;
18407 UseGPRForF16_F32 = !IsFixed;
18408 UseGPRForF64 = !IsFixed;
18409 break;
18410 }
18411
18412 // FPR16, FPR32, and FPR64 alias each other.
18413 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18414 UseGPRForF16_F32 = true;
18415 UseGPRForF64 = true;
18416 }
18417
18418 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18419 // similar local variables rather than directly checking against the target
18420 // ABI.
18421
18422 if (UseGPRForF16_F32 &&
18423 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18424 LocVT = XLenVT;
18425 LocInfo = CCValAssign::BCvt;
18426 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18427 LocVT = MVT::i64;
18428 LocInfo = CCValAssign::BCvt;
18429 }
18430
18432
18433 // If this is a variadic argument, the RISC-V calling convention requires
18434 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18435 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18436 // be used regardless of whether the original argument was split during
18437 // legalisation or not. The argument will not be passed by registers if the
18438 // original type is larger than 2*XLEN, so the register alignment rule does
18439 // not apply.
18440 // TODO: To be compatible with GCC's behaviors, we don't align registers
18441 // currently if we are using ILP32E calling convention. This behavior may be
18442 // changed when RV32E/ILP32E is ratified.
18443 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18444 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18445 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18446 ABI != RISCVABI::ABI_ILP32E) {
18447 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18448 // Skip 'odd' register if necessary.
18449 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18450 State.AllocateReg(ArgGPRs);
18451 }
18452
18453 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18454 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18455 State.getPendingArgFlags();
18456
18457 assert(PendingLocs.size() == PendingArgFlags.size() &&
18458 "PendingLocs and PendingArgFlags out of sync");
18459
18460 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18461 // registers are exhausted.
18462 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18463 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18464 // Depending on available argument GPRS, f64 may be passed in a pair of
18465 // GPRs, split between a GPR and the stack, or passed completely on the
18466 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18467 // cases.
18468 Register Reg = State.AllocateReg(ArgGPRs);
18469 if (!Reg) {
18470 unsigned StackOffset = State.AllocateStack(8, Align(8));
18471 State.addLoc(
18472 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18473 return false;
18474 }
18475 LocVT = MVT::i32;
18476 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18477 Register HiReg = State.AllocateReg(ArgGPRs);
18478 if (HiReg) {
18479 State.addLoc(
18480 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18481 } else {
18482 unsigned StackOffset = State.AllocateStack(4, Align(4));
18483 State.addLoc(
18484 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18485 }
18486 return false;
18487 }
18488
18489 // Fixed-length vectors are located in the corresponding scalable-vector
18490 // container types.
18491 if (ValVT.isFixedLengthVector())
18492 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18493
18494 // Split arguments might be passed indirectly, so keep track of the pending
18495 // values. Split vectors are passed via a mix of registers and indirectly, so
18496 // treat them as we would any other argument.
18497 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18498 LocVT = XLenVT;
18499 LocInfo = CCValAssign::Indirect;
18500 PendingLocs.push_back(
18501 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18502 PendingArgFlags.push_back(ArgFlags);
18503 if (!ArgFlags.isSplitEnd()) {
18504 return false;
18505 }
18506 }
18507
18508 // If the split argument only had two elements, it should be passed directly
18509 // in registers or on the stack.
18510 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18511 PendingLocs.size() <= 2) {
18512 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18513 // Apply the normal calling convention rules to the first half of the
18514 // split argument.
18515 CCValAssign VA = PendingLocs[0];
18516 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18517 PendingLocs.clear();
18518 PendingArgFlags.clear();
18519 return CC_RISCVAssign2XLen(
18520 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18521 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18522 }
18523
18524 // Allocate to a register if possible, or else a stack slot.
18525 Register Reg;
18526 unsigned StoreSizeBytes = XLen / 8;
18527 Align StackAlign = Align(XLen / 8);
18528
18529 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18530 Reg = State.AllocateReg(ArgFPR16s);
18531 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18532 Reg = State.AllocateReg(ArgFPR32s);
18533 else if (ValVT == MVT::f64 && !UseGPRForF64)
18534 Reg = State.AllocateReg(ArgFPR64s);
18535 else if (ValVT.isVector()) {
18536 Reg = RVVDispatcher.getNextPhysReg();
18537 if (!Reg) {
18538 // For return values, the vector must be passed fully via registers or
18539 // via the stack.
18540 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18541 // but we're using all of them.
18542 if (IsRet)
18543 return true;
18544 // Try using a GPR to pass the address
18545 if ((Reg = State.AllocateReg(ArgGPRs))) {
18546 LocVT = XLenVT;
18547 LocInfo = CCValAssign::Indirect;
18548 } else if (ValVT.isScalableVector()) {
18549 LocVT = XLenVT;
18550 LocInfo = CCValAssign::Indirect;
18551 } else {
18552 // Pass fixed-length vectors on the stack.
18553 LocVT = ValVT;
18554 StoreSizeBytes = ValVT.getStoreSize();
18555 // Align vectors to their element sizes, being careful for vXi1
18556 // vectors.
18557 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18558 }
18559 }
18560 } else {
18561 Reg = State.AllocateReg(ArgGPRs);
18562 }
18563
18564 unsigned StackOffset =
18565 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18566
18567 // If we reach this point and PendingLocs is non-empty, we must be at the
18568 // end of a split argument that must be passed indirectly.
18569 if (!PendingLocs.empty()) {
18570 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18571 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18572
18573 for (auto &It : PendingLocs) {
18574 if (Reg)
18575 It.convertToReg(Reg);
18576 else
18577 It.convertToMem(StackOffset);
18578 State.addLoc(It);
18579 }
18580 PendingLocs.clear();
18581 PendingArgFlags.clear();
18582 return false;
18583 }
18584
18585 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18586 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18587 "Expected an XLenVT or vector types at this stage");
18588
18589 if (Reg) {
18590 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18591 return false;
18592 }
18593
18594 // When a scalar floating-point value is passed on the stack, no
18595 // bit-conversion is needed.
18596 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18597 assert(!ValVT.isVector());
18598 LocVT = ValVT;
18599 LocInfo = CCValAssign::Full;
18600 }
18601 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18602 return false;
18603}
18604
18605template <typename ArgTy>
18606static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18607 for (const auto &ArgIdx : enumerate(Args)) {
18608 MVT ArgVT = ArgIdx.value().VT;
18609 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18610 return ArgIdx.index();
18611 }
18612 return std::nullopt;
18613}
18614
18615void RISCVTargetLowering::analyzeInputArgs(
18616 MachineFunction &MF, CCState &CCInfo,
18617 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18618 RISCVCCAssignFn Fn) const {
18619 unsigned NumArgs = Ins.size();
18621
18622 RVVArgDispatcher Dispatcher;
18623 if (IsRet) {
18624 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18625 } else {
18626 SmallVector<Type *, 4> TypeList;
18627 for (const Argument &Arg : MF.getFunction().args())
18628 TypeList.push_back(Arg.getType());
18629 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18630 }
18631
18632 for (unsigned i = 0; i != NumArgs; ++i) {
18633 MVT ArgVT = Ins[i].VT;
18634 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18635
18636 Type *ArgTy = nullptr;
18637 if (IsRet)
18638 ArgTy = FType->getReturnType();
18639 else if (Ins[i].isOrigArg())
18640 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18641
18643 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18644 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18645 Dispatcher)) {
18646 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18647 << ArgVT << '\n');
18648 llvm_unreachable(nullptr);
18649 }
18650 }
18651}
18652
18653void RISCVTargetLowering::analyzeOutputArgs(
18654 MachineFunction &MF, CCState &CCInfo,
18655 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18656 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18657 unsigned NumArgs = Outs.size();
18658
18659 SmallVector<Type *, 4> TypeList;
18660 if (IsRet)
18661 TypeList.push_back(MF.getFunction().getReturnType());
18662 else if (CLI)
18663 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18664 TypeList.push_back(Arg.Ty);
18665 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18666
18667 for (unsigned i = 0; i != NumArgs; i++) {
18668 MVT ArgVT = Outs[i].VT;
18669 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18670 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18671
18673 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18674 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18675 Dispatcher)) {
18676 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18677 << ArgVT << "\n");
18678 llvm_unreachable(nullptr);
18679 }
18680 }
18681}
18682
18683// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18684// values.
18686 const CCValAssign &VA, const SDLoc &DL,
18687 const RISCVSubtarget &Subtarget) {
18688 switch (VA.getLocInfo()) {
18689 default:
18690 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18691 case CCValAssign::Full:
18693 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18694 break;
18695 case CCValAssign::BCvt:
18696 if (VA.getLocVT().isInteger() &&
18697 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18698 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18699 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18700 if (RV64LegalI32) {
18701 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
18702 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18703 } else {
18704 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18705 }
18706 } else {
18707 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18708 }
18709 break;
18710 }
18711 return Val;
18712}
18713
18714// The caller is responsible for loading the full value if the argument is
18715// passed with CCValAssign::Indirect.
18717 const CCValAssign &VA, const SDLoc &DL,
18718 const ISD::InputArg &In,
18719 const RISCVTargetLowering &TLI) {
18722 EVT LocVT = VA.getLocVT();
18723 SDValue Val;
18724 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18725 Register VReg = RegInfo.createVirtualRegister(RC);
18726 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18727 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18728
18729 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18730 if (In.isOrigArg()) {
18731 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18732 if (OrigArg->getType()->isIntegerTy()) {
18733 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18734 // An input zero extended from i31 can also be considered sign extended.
18735 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18736 (BitWidth < 32 && In.Flags.isZExt())) {
18738 RVFI->addSExt32Register(VReg);
18739 }
18740 }
18741 }
18742
18744 return Val;
18745
18746 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18747}
18748
18750 const CCValAssign &VA, const SDLoc &DL,
18751 const RISCVSubtarget &Subtarget) {
18752 EVT LocVT = VA.getLocVT();
18753
18754 switch (VA.getLocInfo()) {
18755 default:
18756 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18757 case CCValAssign::Full:
18758 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18759 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
18760 break;
18761 case CCValAssign::BCvt:
18762 if (LocVT.isInteger() &&
18763 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18764 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
18765 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18766 if (RV64LegalI32) {
18767 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
18768 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
18769 } else {
18770 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18771 }
18772 } else {
18773 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
18774 }
18775 break;
18776 }
18777 return Val;
18778}
18779
18780// The caller is responsible for loading the full value if the argument is
18781// passed with CCValAssign::Indirect.
18783 const CCValAssign &VA, const SDLoc &DL) {
18785 MachineFrameInfo &MFI = MF.getFrameInfo();
18786 EVT LocVT = VA.getLocVT();
18787 EVT ValVT = VA.getValVT();
18789 if (ValVT.isScalableVector()) {
18790 // When the value is a scalable vector, we save the pointer which points to
18791 // the scalable vector value in the stack. The ValVT will be the pointer
18792 // type, instead of the scalable vector type.
18793 ValVT = LocVT;
18794 }
18795 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
18796 /*IsImmutable=*/true);
18797 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18798 SDValue Val;
18799
18800 ISD::LoadExtType ExtType;
18801 switch (VA.getLocInfo()) {
18802 default:
18803 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18804 case CCValAssign::Full:
18806 case CCValAssign::BCvt:
18807 ExtType = ISD::NON_EXTLOAD;
18808 break;
18809 }
18810 Val = DAG.getExtLoad(
18811 ExtType, DL, LocVT, Chain, FIN,
18813 return Val;
18814}
18815
18817 const CCValAssign &VA,
18818 const CCValAssign &HiVA,
18819 const SDLoc &DL) {
18820 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
18821 "Unexpected VA");
18823 MachineFrameInfo &MFI = MF.getFrameInfo();
18825
18826 assert(VA.isRegLoc() && "Expected register VA assignment");
18827
18828 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18829 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
18830 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
18831 SDValue Hi;
18832 if (HiVA.isMemLoc()) {
18833 // Second half of f64 is passed on the stack.
18834 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
18835 /*IsImmutable=*/true);
18836 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
18837 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
18839 } else {
18840 // Second half of f64 is passed in another GPR.
18841 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18842 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
18843 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
18844 }
18845 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
18846}
18847
18848// FastCC has less than 1% performance improvement for some particular
18849// benchmark. But theoretically, it may has benenfit for some cases.
18851 unsigned ValNo, MVT ValVT, MVT LocVT,
18852 CCValAssign::LocInfo LocInfo,
18853 ISD::ArgFlagsTy ArgFlags, CCState &State,
18854 bool IsFixed, bool IsRet, Type *OrigTy,
18855 const RISCVTargetLowering &TLI,
18856 RVVArgDispatcher &RVVDispatcher) {
18857 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18858 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18859 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18860 return false;
18861 }
18862 }
18863
18864 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
18865
18866 if (LocVT == MVT::f16 &&
18867 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
18868 static const MCPhysReg FPR16List[] = {
18869 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
18870 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
18871 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
18872 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
18873 if (unsigned Reg = State.AllocateReg(FPR16List)) {
18874 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18875 return false;
18876 }
18877 }
18878
18879 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18880 static const MCPhysReg FPR32List[] = {
18881 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
18882 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
18883 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
18884 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
18885 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18886 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18887 return false;
18888 }
18889 }
18890
18891 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18892 static const MCPhysReg FPR64List[] = {
18893 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
18894 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
18895 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
18896 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
18897 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18898 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18899 return false;
18900 }
18901 }
18902
18903 // Check if there is an available GPR before hitting the stack.
18904 if ((LocVT == MVT::f16 &&
18905 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
18906 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18907 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
18908 Subtarget.hasStdExtZdinx())) {
18909 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18910 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18911 return false;
18912 }
18913 }
18914
18915 if (LocVT == MVT::f16) {
18916 unsigned Offset2 = State.AllocateStack(2, Align(2));
18917 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
18918 return false;
18919 }
18920
18921 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
18922 unsigned Offset4 = State.AllocateStack(4, Align(4));
18923 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
18924 return false;
18925 }
18926
18927 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
18928 unsigned Offset5 = State.AllocateStack(8, Align(8));
18929 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
18930 return false;
18931 }
18932
18933 if (LocVT.isVector()) {
18934 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
18935 if (AllocatedVReg) {
18936 // Fixed-length vectors are located in the corresponding scalable-vector
18937 // container types.
18938 if (ValVT.isFixedLengthVector())
18939 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18940 State.addLoc(
18941 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
18942 } else {
18943 // Try and pass the address via a "fast" GPR.
18944 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18945 LocInfo = CCValAssign::Indirect;
18946 LocVT = TLI.getSubtarget().getXLenVT();
18947 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
18948 } else if (ValVT.isFixedLengthVector()) {
18949 auto StackAlign =
18951 unsigned StackOffset =
18952 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
18953 State.addLoc(
18954 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18955 } else {
18956 // Can't pass scalable vectors on the stack.
18957 return true;
18958 }
18959 }
18960
18961 return false;
18962 }
18963
18964 return true; // CC didn't match.
18965}
18966
18967bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
18968 CCValAssign::LocInfo LocInfo,
18969 ISD::ArgFlagsTy ArgFlags, CCState &State) {
18970 if (ArgFlags.isNest()) {
18972 "Attribute 'nest' is not supported in GHC calling convention");
18973 }
18974
18975 static const MCPhysReg GPRList[] = {
18976 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
18977 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
18978
18979 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18980 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
18981 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
18982 if (unsigned Reg = State.AllocateReg(GPRList)) {
18983 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18984 return false;
18985 }
18986 }
18987
18988 const RISCVSubtarget &Subtarget =
18990
18991 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18992 // Pass in STG registers: F1, ..., F6
18993 // fs0 ... fs5
18994 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
18995 RISCV::F18_F, RISCV::F19_F,
18996 RISCV::F20_F, RISCV::F21_F};
18997 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18998 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18999 return false;
19000 }
19001 }
19002
19003 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19004 // Pass in STG registers: D1, ..., D6
19005 // fs6 ... fs11
19006 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
19007 RISCV::F24_D, RISCV::F25_D,
19008 RISCV::F26_D, RISCV::F27_D};
19009 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19010 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19011 return false;
19012 }
19013 }
19014
19015 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19016 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
19017 Subtarget.is64Bit())) {
19018 if (unsigned Reg = State.AllocateReg(GPRList)) {
19019 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19020 return false;
19021 }
19022 }
19023
19024 report_fatal_error("No registers left in GHC calling convention");
19025 return true;
19026}
19027
19028// Transform physical registers into virtual registers.
19030 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19031 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19032 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19033
19035
19036 switch (CallConv) {
19037 default:
19038 report_fatal_error("Unsupported calling convention");
19039 case CallingConv::C:
19040 case CallingConv::Fast:
19042 case CallingConv::GRAAL:
19044 break;
19045 case CallingConv::GHC:
19046 if (Subtarget.hasStdExtE())
19047 report_fatal_error("GHC calling convention is not supported on RVE!");
19048 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19049 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19050 "(Zdinx/D) instruction set extensions");
19051 }
19052
19053 const Function &Func = MF.getFunction();
19054 if (Func.hasFnAttribute("interrupt")) {
19055 if (!Func.arg_empty())
19057 "Functions with the interrupt attribute cannot have arguments!");
19058
19059 StringRef Kind =
19060 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19061
19062 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19064 "Function interrupt attribute argument not supported!");
19065 }
19066
19067 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19068 MVT XLenVT = Subtarget.getXLenVT();
19069 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19070 // Used with vargs to acumulate store chains.
19071 std::vector<SDValue> OutChains;
19072
19073 // Assign locations to all of the incoming arguments.
19075 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19076
19077 if (CallConv == CallingConv::GHC)
19079 else
19080 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19082 : RISCV::CC_RISCV);
19083
19084 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19085 CCValAssign &VA = ArgLocs[i];
19086 SDValue ArgValue;
19087 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19088 // case.
19089 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19090 assert(VA.needsCustom());
19091 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19092 } else if (VA.isRegLoc())
19093 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19094 else
19095 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19096
19097 if (VA.getLocInfo() == CCValAssign::Indirect) {
19098 // If the original argument was split and passed by reference (e.g. i128
19099 // on RV32), we need to load all parts of it here (using the same
19100 // address). Vectors may be partly split to registers and partly to the
19101 // stack, in which case the base address is partly offset and subsequent
19102 // stores are relative to that.
19103 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19105 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19106 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19107 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19108 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19109 CCValAssign &PartVA = ArgLocs[i + 1];
19110 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19111 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19112 if (PartVA.getValVT().isScalableVector())
19113 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19114 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19115 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19117 ++i;
19118 ++InsIdx;
19119 }
19120 continue;
19121 }
19122 InVals.push_back(ArgValue);
19123 }
19124
19125 if (any_of(ArgLocs,
19126 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19127 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19128
19129 if (IsVarArg) {
19130 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19131 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19132 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19133 MachineFrameInfo &MFI = MF.getFrameInfo();
19134 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19136
19137 // Size of the vararg save area. For now, the varargs save area is either
19138 // zero or large enough to hold a0-a7.
19139 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19140 int FI;
19141
19142 // If all registers are allocated, then all varargs must be passed on the
19143 // stack and we don't need to save any argregs.
19144 if (VarArgsSaveSize == 0) {
19145 int VaArgOffset = CCInfo.getStackSize();
19146 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19147 } else {
19148 int VaArgOffset = -VarArgsSaveSize;
19149 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19150
19151 // If saving an odd number of registers then create an extra stack slot to
19152 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19153 // offsets to even-numbered registered remain 2*XLEN-aligned.
19154 if (Idx % 2) {
19156 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19157 VarArgsSaveSize += XLenInBytes;
19158 }
19159
19160 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19161
19162 // Copy the integer registers that may have been used for passing varargs
19163 // to the vararg save area.
19164 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19165 const Register Reg = RegInfo.createVirtualRegister(RC);
19166 RegInfo.addLiveIn(ArgRegs[I], Reg);
19167 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19168 SDValue Store = DAG.getStore(
19169 Chain, DL, ArgValue, FIN,
19170 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19171 OutChains.push_back(Store);
19172 FIN =
19173 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19174 }
19175 }
19176
19177 // Record the frame index of the first variable argument
19178 // which is a value necessary to VASTART.
19179 RVFI->setVarArgsFrameIndex(FI);
19180 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19181 }
19182
19183 // All stores are grouped in one node to allow the matching between
19184 // the size of Ins and InVals. This only happens for vararg functions.
19185 if (!OutChains.empty()) {
19186 OutChains.push_back(Chain);
19187 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19188 }
19189
19190 return Chain;
19191}
19192
19193/// isEligibleForTailCallOptimization - Check whether the call is eligible
19194/// for tail call optimization.
19195/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19196bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19197 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19198 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19199
19200 auto CalleeCC = CLI.CallConv;
19201 auto &Outs = CLI.Outs;
19202 auto &Caller = MF.getFunction();
19203 auto CallerCC = Caller.getCallingConv();
19204
19205 // Exception-handling functions need a special set of instructions to
19206 // indicate a return to the hardware. Tail-calling another function would
19207 // probably break this.
19208 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19209 // should be expanded as new function attributes are introduced.
19210 if (Caller.hasFnAttribute("interrupt"))
19211 return false;
19212
19213 // Do not tail call opt if the stack is used to pass parameters.
19214 if (CCInfo.getStackSize() != 0)
19215 return false;
19216
19217 // Do not tail call opt if any parameters need to be passed indirectly.
19218 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19219 // passed indirectly. So the address of the value will be passed in a
19220 // register, or if not available, then the address is put on the stack. In
19221 // order to pass indirectly, space on the stack often needs to be allocated
19222 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19223 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19224 // are passed CCValAssign::Indirect.
19225 for (auto &VA : ArgLocs)
19226 if (VA.getLocInfo() == CCValAssign::Indirect)
19227 return false;
19228
19229 // Do not tail call opt if either caller or callee uses struct return
19230 // semantics.
19231 auto IsCallerStructRet = Caller.hasStructRetAttr();
19232 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19233 if (IsCallerStructRet || IsCalleeStructRet)
19234 return false;
19235
19236 // The callee has to preserve all registers the caller needs to preserve.
19237 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19238 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19239 if (CalleeCC != CallerCC) {
19240 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19241 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19242 return false;
19243 }
19244
19245 // Byval parameters hand the function a pointer directly into the stack area
19246 // we want to reuse during a tail call. Working around this *is* possible
19247 // but less efficient and uglier in LowerCall.
19248 for (auto &Arg : Outs)
19249 if (Arg.Flags.isByVal())
19250 return false;
19251
19252 return true;
19253}
19254
19256 return DAG.getDataLayout().getPrefTypeAlign(
19257 VT.getTypeForEVT(*DAG.getContext()));
19258}
19259
19260// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19261// and output parameter nodes.
19263 SmallVectorImpl<SDValue> &InVals) const {
19264 SelectionDAG &DAG = CLI.DAG;
19265 SDLoc &DL = CLI.DL;
19267 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19269 SDValue Chain = CLI.Chain;
19270 SDValue Callee = CLI.Callee;
19271 bool &IsTailCall = CLI.IsTailCall;
19272 CallingConv::ID CallConv = CLI.CallConv;
19273 bool IsVarArg = CLI.IsVarArg;
19274 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19275 MVT XLenVT = Subtarget.getXLenVT();
19276
19278
19279 // Analyze the operands of the call, assigning locations to each operand.
19281 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19282
19283 if (CallConv == CallingConv::GHC) {
19284 if (Subtarget.hasStdExtE())
19285 report_fatal_error("GHC calling convention is not supported on RVE!");
19287 } else
19288 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19290 : RISCV::CC_RISCV);
19291
19292 // Check if it's really possible to do a tail call.
19293 if (IsTailCall)
19294 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19295
19296 if (IsTailCall)
19297 ++NumTailCalls;
19298 else if (CLI.CB && CLI.CB->isMustTailCall())
19299 report_fatal_error("failed to perform tail call elimination on a call "
19300 "site marked musttail");
19301
19302 // Get a count of how many bytes are to be pushed on the stack.
19303 unsigned NumBytes = ArgCCInfo.getStackSize();
19304
19305 // Create local copies for byval args
19306 SmallVector<SDValue, 8> ByValArgs;
19307 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19308 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19309 if (!Flags.isByVal())
19310 continue;
19311
19312 SDValue Arg = OutVals[i];
19313 unsigned Size = Flags.getByValSize();
19314 Align Alignment = Flags.getNonZeroByValAlign();
19315
19316 int FI =
19317 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19318 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19319 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19320
19321 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19322 /*IsVolatile=*/false,
19323 /*AlwaysInline=*/false, IsTailCall,
19325 ByValArgs.push_back(FIPtr);
19326 }
19327
19328 if (!IsTailCall)
19329 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19330
19331 // Copy argument values to their designated locations.
19333 SmallVector<SDValue, 8> MemOpChains;
19334 SDValue StackPtr;
19335 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19336 ++i, ++OutIdx) {
19337 CCValAssign &VA = ArgLocs[i];
19338 SDValue ArgValue = OutVals[OutIdx];
19339 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19340
19341 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19342 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19343 assert(VA.isRegLoc() && "Expected register VA assignment");
19344 assert(VA.needsCustom());
19345 SDValue SplitF64 = DAG.getNode(
19346 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19347 SDValue Lo = SplitF64.getValue(0);
19348 SDValue Hi = SplitF64.getValue(1);
19349
19350 Register RegLo = VA.getLocReg();
19351 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19352
19353 // Get the CCValAssign for the Hi part.
19354 CCValAssign &HiVA = ArgLocs[++i];
19355
19356 if (HiVA.isMemLoc()) {
19357 // Second half of f64 is passed on the stack.
19358 if (!StackPtr.getNode())
19359 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19361 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19362 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19363 // Emit the store.
19364 MemOpChains.push_back(
19365 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19366 } else {
19367 // Second half of f64 is passed in another GPR.
19368 Register RegHigh = HiVA.getLocReg();
19369 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19370 }
19371 continue;
19372 }
19373
19374 // Promote the value if needed.
19375 // For now, only handle fully promoted and indirect arguments.
19376 if (VA.getLocInfo() == CCValAssign::Indirect) {
19377 // Store the argument in a stack slot and pass its address.
19378 Align StackAlign =
19379 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19380 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19381 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19382 // If the original argument was split (e.g. i128), we need
19383 // to store the required parts of it here (and pass just one address).
19384 // Vectors may be partly split to registers and partly to the stack, in
19385 // which case the base address is partly offset and subsequent stores are
19386 // relative to that.
19387 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19388 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19389 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19390 // Calculate the total size to store. We don't have access to what we're
19391 // actually storing other than performing the loop and collecting the
19392 // info.
19394 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19395 SDValue PartValue = OutVals[OutIdx + 1];
19396 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19397 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19398 EVT PartVT = PartValue.getValueType();
19399 if (PartVT.isScalableVector())
19400 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19401 StoredSize += PartVT.getStoreSize();
19402 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19403 Parts.push_back(std::make_pair(PartValue, Offset));
19404 ++i;
19405 ++OutIdx;
19406 }
19407 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19408 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19409 MemOpChains.push_back(
19410 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19412 for (const auto &Part : Parts) {
19413 SDValue PartValue = Part.first;
19414 SDValue PartOffset = Part.second;
19416 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19417 MemOpChains.push_back(
19418 DAG.getStore(Chain, DL, PartValue, Address,
19420 }
19421 ArgValue = SpillSlot;
19422 } else {
19423 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19424 }
19425
19426 // Use local copy if it is a byval arg.
19427 if (Flags.isByVal())
19428 ArgValue = ByValArgs[j++];
19429
19430 if (VA.isRegLoc()) {
19431 // Queue up the argument copies and emit them at the end.
19432 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19433 } else {
19434 assert(VA.isMemLoc() && "Argument not register or memory");
19435 assert(!IsTailCall && "Tail call not allowed if stack is used "
19436 "for passing parameters");
19437
19438 // Work out the address of the stack slot.
19439 if (!StackPtr.getNode())
19440 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19442 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19444
19445 // Emit the store.
19446 MemOpChains.push_back(
19447 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19448 }
19449 }
19450
19451 // Join the stores, which are independent of one another.
19452 if (!MemOpChains.empty())
19453 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19454
19455 SDValue Glue;
19456
19457 // Build a sequence of copy-to-reg nodes, chained and glued together.
19458 for (auto &Reg : RegsToPass) {
19459 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19460 Glue = Chain.getValue(1);
19461 }
19462
19463 // Validate that none of the argument registers have been marked as
19464 // reserved, if so report an error. Do the same for the return address if this
19465 // is not a tailcall.
19466 validateCCReservedRegs(RegsToPass, MF);
19467 if (!IsTailCall &&
19470 MF.getFunction(),
19471 "Return address register required, but has been reserved."});
19472
19473 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19474 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19475 // split it and then direct call can be matched by PseudoCALL.
19476 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19477 const GlobalValue *GV = S->getGlobal();
19478 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19479 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19480 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19481 }
19482
19483 // The first call operand is the chain and the second is the target address.
19485 Ops.push_back(Chain);
19486 Ops.push_back(Callee);
19487
19488 // Add argument registers to the end of the list so that they are
19489 // known live into the call.
19490 for (auto &Reg : RegsToPass)
19491 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19492
19493 if (!IsTailCall) {
19494 // Add a register mask operand representing the call-preserved registers.
19495 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19496 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19497 assert(Mask && "Missing call preserved mask for calling convention");
19498 Ops.push_back(DAG.getRegisterMask(Mask));
19499 }
19500
19501 // Glue the call to the argument copies, if any.
19502 if (Glue.getNode())
19503 Ops.push_back(Glue);
19504
19505 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19506 "Unexpected CFI type for a direct call");
19507
19508 // Emit the call.
19509 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19510
19511 if (IsTailCall) {
19513 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19514 if (CLI.CFIType)
19515 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19516 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19517 return Ret;
19518 }
19519
19520 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19521 if (CLI.CFIType)
19522 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19523 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19524 Glue = Chain.getValue(1);
19525
19526 // Mark the end of the call, which is glued to the call itself.
19527 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19528 Glue = Chain.getValue(1);
19529
19530 // Assign locations to each value returned by this call.
19532 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19533 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19534
19535 // Copy all of the result registers out of their specified physreg.
19536 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19537 auto &VA = RVLocs[i];
19538 // Copy the value out
19539 SDValue RetValue =
19540 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19541 // Glue the RetValue to the end of the call sequence
19542 Chain = RetValue.getValue(1);
19543 Glue = RetValue.getValue(2);
19544
19545 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19546 assert(VA.needsCustom());
19547 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19548 MVT::i32, Glue);
19549 Chain = RetValue2.getValue(1);
19550 Glue = RetValue2.getValue(2);
19551 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19552 RetValue2);
19553 }
19554
19555 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19556
19557 InVals.push_back(RetValue);
19558 }
19559
19560 return Chain;
19561}
19562
19564 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19565 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19567 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19568
19569 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19570
19571 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19572 MVT VT = Outs[i].VT;
19573 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19574 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19575 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19576 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19577 nullptr, *this, Dispatcher))
19578 return false;
19579 }
19580 return true;
19581}
19582
19583SDValue
19585 bool IsVarArg,
19587 const SmallVectorImpl<SDValue> &OutVals,
19588 const SDLoc &DL, SelectionDAG &DAG) const {
19590 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19591
19592 // Stores the assignment of the return value to a location.
19594
19595 // Info about the registers and stack slot.
19596 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19597 *DAG.getContext());
19598
19599 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19600 nullptr, RISCV::CC_RISCV);
19601
19602 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19603 report_fatal_error("GHC functions return void only");
19604
19605 SDValue Glue;
19606 SmallVector<SDValue, 4> RetOps(1, Chain);
19607
19608 // Copy the result values into the output registers.
19609 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19610 SDValue Val = OutVals[OutIdx];
19611 CCValAssign &VA = RVLocs[i];
19612 assert(VA.isRegLoc() && "Can only return in registers!");
19613
19614 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19615 // Handle returning f64 on RV32D with a soft float ABI.
19616 assert(VA.isRegLoc() && "Expected return via registers");
19617 assert(VA.needsCustom());
19618 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19619 DAG.getVTList(MVT::i32, MVT::i32), Val);
19620 SDValue Lo = SplitF64.getValue(0);
19621 SDValue Hi = SplitF64.getValue(1);
19622 Register RegLo = VA.getLocReg();
19623 Register RegHi = RVLocs[++i].getLocReg();
19624
19625 if (STI.isRegisterReservedByUser(RegLo) ||
19626 STI.isRegisterReservedByUser(RegHi))
19628 MF.getFunction(),
19629 "Return value register required, but has been reserved."});
19630
19631 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19632 Glue = Chain.getValue(1);
19633 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19634 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19635 Glue = Chain.getValue(1);
19636 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19637 } else {
19638 // Handle a 'normal' return.
19639 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19640 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19641
19642 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19644 MF.getFunction(),
19645 "Return value register required, but has been reserved."});
19646
19647 // Guarantee that all emitted copies are stuck together.
19648 Glue = Chain.getValue(1);
19649 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19650 }
19651 }
19652
19653 RetOps[0] = Chain; // Update chain.
19654
19655 // Add the glue node if we have it.
19656 if (Glue.getNode()) {
19657 RetOps.push_back(Glue);
19658 }
19659
19660 if (any_of(RVLocs,
19661 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19662 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19663
19664 unsigned RetOpc = RISCVISD::RET_GLUE;
19665 // Interrupt service routines use different return instructions.
19666 const Function &Func = DAG.getMachineFunction().getFunction();
19667 if (Func.hasFnAttribute("interrupt")) {
19668 if (!Func.getReturnType()->isVoidTy())
19670 "Functions with the interrupt attribute must have void return type!");
19671
19673 StringRef Kind =
19674 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19675
19676 if (Kind == "supervisor")
19677 RetOpc = RISCVISD::SRET_GLUE;
19678 else
19679 RetOpc = RISCVISD::MRET_GLUE;
19680 }
19681
19682 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19683}
19684
19685void RISCVTargetLowering::validateCCReservedRegs(
19686 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19687 MachineFunction &MF) const {
19688 const Function &F = MF.getFunction();
19689 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19690
19691 if (llvm::any_of(Regs, [&STI](auto Reg) {
19692 return STI.isRegisterReservedByUser(Reg.first);
19693 }))
19694 F.getContext().diagnose(DiagnosticInfoUnsupported{
19695 F, "Argument register required, but has been reserved."});
19696}
19697
19698// Check if the result of the node is only used as a return value, as
19699// otherwise we can't perform a tail-call.
19701 if (N->getNumValues() != 1)
19702 return false;
19703 if (!N->hasNUsesOfValue(1, 0))
19704 return false;
19705
19706 SDNode *Copy = *N->use_begin();
19707
19708 if (Copy->getOpcode() == ISD::BITCAST) {
19709 return isUsedByReturnOnly(Copy, Chain);
19710 }
19711
19712 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19713 // with soft float ABIs.
19714 if (Copy->getOpcode() != ISD::CopyToReg) {
19715 return false;
19716 }
19717
19718 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19719 // isn't safe to perform a tail call.
19720 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19721 return false;
19722
19723 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19724 bool HasRet = false;
19725 for (SDNode *Node : Copy->uses()) {
19726 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19727 return false;
19728 HasRet = true;
19729 }
19730 if (!HasRet)
19731 return false;
19732
19733 Chain = Copy->getOperand(0);
19734 return true;
19735}
19736
19738 return CI->isTailCall();
19739}
19740
19741const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19742#define NODE_NAME_CASE(NODE) \
19743 case RISCVISD::NODE: \
19744 return "RISCVISD::" #NODE;
19745 // clang-format off
19746 switch ((RISCVISD::NodeType)Opcode) {
19748 break;
19749 NODE_NAME_CASE(RET_GLUE)
19750 NODE_NAME_CASE(SRET_GLUE)
19751 NODE_NAME_CASE(MRET_GLUE)
19752 NODE_NAME_CASE(CALL)
19753 NODE_NAME_CASE(SELECT_CC)
19754 NODE_NAME_CASE(BR_CC)
19755 NODE_NAME_CASE(BuildPairF64)
19756 NODE_NAME_CASE(SplitF64)
19757 NODE_NAME_CASE(TAIL)
19758 NODE_NAME_CASE(ADD_LO)
19759 NODE_NAME_CASE(HI)
19760 NODE_NAME_CASE(LLA)
19761 NODE_NAME_CASE(ADD_TPREL)
19762 NODE_NAME_CASE(MULHSU)
19763 NODE_NAME_CASE(SHL_ADD)
19764 NODE_NAME_CASE(SLLW)
19765 NODE_NAME_CASE(SRAW)
19766 NODE_NAME_CASE(SRLW)
19767 NODE_NAME_CASE(DIVW)
19768 NODE_NAME_CASE(DIVUW)
19769 NODE_NAME_CASE(REMUW)
19770 NODE_NAME_CASE(ROLW)
19771 NODE_NAME_CASE(RORW)
19772 NODE_NAME_CASE(CLZW)
19773 NODE_NAME_CASE(CTZW)
19774 NODE_NAME_CASE(ABSW)
19775 NODE_NAME_CASE(FMV_H_X)
19776 NODE_NAME_CASE(FMV_X_ANYEXTH)
19777 NODE_NAME_CASE(FMV_X_SIGNEXTH)
19778 NODE_NAME_CASE(FMV_W_X_RV64)
19779 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19780 NODE_NAME_CASE(FCVT_X)
19781 NODE_NAME_CASE(FCVT_XU)
19782 NODE_NAME_CASE(FCVT_W_RV64)
19783 NODE_NAME_CASE(FCVT_WU_RV64)
19784 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19785 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19786 NODE_NAME_CASE(FP_ROUND_BF16)
19787 NODE_NAME_CASE(FP_EXTEND_BF16)
19788 NODE_NAME_CASE(FROUND)
19789 NODE_NAME_CASE(FCLASS)
19790 NODE_NAME_CASE(FMAX)
19791 NODE_NAME_CASE(FMIN)
19792 NODE_NAME_CASE(READ_COUNTER_WIDE)
19793 NODE_NAME_CASE(BREV8)
19794 NODE_NAME_CASE(ORC_B)
19795 NODE_NAME_CASE(ZIP)
19796 NODE_NAME_CASE(UNZIP)
19797 NODE_NAME_CASE(CLMUL)
19798 NODE_NAME_CASE(CLMULH)
19799 NODE_NAME_CASE(CLMULR)
19800 NODE_NAME_CASE(MOPR)
19801 NODE_NAME_CASE(MOPRR)
19802 NODE_NAME_CASE(SHA256SIG0)
19803 NODE_NAME_CASE(SHA256SIG1)
19804 NODE_NAME_CASE(SHA256SUM0)
19805 NODE_NAME_CASE(SHA256SUM1)
19806 NODE_NAME_CASE(SM4KS)
19807 NODE_NAME_CASE(SM4ED)
19808 NODE_NAME_CASE(SM3P0)
19809 NODE_NAME_CASE(SM3P1)
19810 NODE_NAME_CASE(TH_LWD)
19811 NODE_NAME_CASE(TH_LWUD)
19812 NODE_NAME_CASE(TH_LDD)
19813 NODE_NAME_CASE(TH_SWD)
19814 NODE_NAME_CASE(TH_SDD)
19815 NODE_NAME_CASE(VMV_V_V_VL)
19816 NODE_NAME_CASE(VMV_V_X_VL)
19817 NODE_NAME_CASE(VFMV_V_F_VL)
19818 NODE_NAME_CASE(VMV_X_S)
19819 NODE_NAME_CASE(VMV_S_X_VL)
19820 NODE_NAME_CASE(VFMV_S_F_VL)
19821 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
19822 NODE_NAME_CASE(READ_VLENB)
19823 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
19824 NODE_NAME_CASE(VSLIDEUP_VL)
19825 NODE_NAME_CASE(VSLIDE1UP_VL)
19826 NODE_NAME_CASE(VSLIDEDOWN_VL)
19827 NODE_NAME_CASE(VSLIDE1DOWN_VL)
19828 NODE_NAME_CASE(VFSLIDE1UP_VL)
19829 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
19830 NODE_NAME_CASE(VID_VL)
19831 NODE_NAME_CASE(VFNCVT_ROD_VL)
19832 NODE_NAME_CASE(VECREDUCE_ADD_VL)
19833 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
19834 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
19835 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
19836 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
19837 NODE_NAME_CASE(VECREDUCE_AND_VL)
19838 NODE_NAME_CASE(VECREDUCE_OR_VL)
19839 NODE_NAME_CASE(VECREDUCE_XOR_VL)
19840 NODE_NAME_CASE(VECREDUCE_FADD_VL)
19841 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
19842 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
19843 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
19844 NODE_NAME_CASE(ADD_VL)
19845 NODE_NAME_CASE(AND_VL)
19846 NODE_NAME_CASE(MUL_VL)
19847 NODE_NAME_CASE(OR_VL)
19848 NODE_NAME_CASE(SDIV_VL)
19849 NODE_NAME_CASE(SHL_VL)
19850 NODE_NAME_CASE(SREM_VL)
19851 NODE_NAME_CASE(SRA_VL)
19852 NODE_NAME_CASE(SRL_VL)
19853 NODE_NAME_CASE(ROTL_VL)
19854 NODE_NAME_CASE(ROTR_VL)
19855 NODE_NAME_CASE(SUB_VL)
19856 NODE_NAME_CASE(UDIV_VL)
19857 NODE_NAME_CASE(UREM_VL)
19858 NODE_NAME_CASE(XOR_VL)
19859 NODE_NAME_CASE(AVGFLOORU_VL)
19860 NODE_NAME_CASE(AVGCEILU_VL)
19861 NODE_NAME_CASE(SADDSAT_VL)
19862 NODE_NAME_CASE(UADDSAT_VL)
19863 NODE_NAME_CASE(SSUBSAT_VL)
19864 NODE_NAME_CASE(USUBSAT_VL)
19865 NODE_NAME_CASE(FADD_VL)
19866 NODE_NAME_CASE(FSUB_VL)
19867 NODE_NAME_CASE(FMUL_VL)
19868 NODE_NAME_CASE(FDIV_VL)
19869 NODE_NAME_CASE(FNEG_VL)
19870 NODE_NAME_CASE(FABS_VL)
19871 NODE_NAME_CASE(FSQRT_VL)
19872 NODE_NAME_CASE(FCLASS_VL)
19873 NODE_NAME_CASE(VFMADD_VL)
19874 NODE_NAME_CASE(VFNMADD_VL)
19875 NODE_NAME_CASE(VFMSUB_VL)
19876 NODE_NAME_CASE(VFNMSUB_VL)
19877 NODE_NAME_CASE(VFWMADD_VL)
19878 NODE_NAME_CASE(VFWNMADD_VL)
19879 NODE_NAME_CASE(VFWMSUB_VL)
19880 NODE_NAME_CASE(VFWNMSUB_VL)
19881 NODE_NAME_CASE(FCOPYSIGN_VL)
19882 NODE_NAME_CASE(SMIN_VL)
19883 NODE_NAME_CASE(SMAX_VL)
19884 NODE_NAME_CASE(UMIN_VL)
19885 NODE_NAME_CASE(UMAX_VL)
19886 NODE_NAME_CASE(BITREVERSE_VL)
19887 NODE_NAME_CASE(BSWAP_VL)
19888 NODE_NAME_CASE(CTLZ_VL)
19889 NODE_NAME_CASE(CTTZ_VL)
19890 NODE_NAME_CASE(CTPOP_VL)
19891 NODE_NAME_CASE(VFMIN_VL)
19892 NODE_NAME_CASE(VFMAX_VL)
19893 NODE_NAME_CASE(MULHS_VL)
19894 NODE_NAME_CASE(MULHU_VL)
19895 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
19896 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
19897 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
19898 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
19899 NODE_NAME_CASE(VFCVT_X_F_VL)
19900 NODE_NAME_CASE(VFCVT_XU_F_VL)
19901 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
19902 NODE_NAME_CASE(SINT_TO_FP_VL)
19903 NODE_NAME_CASE(UINT_TO_FP_VL)
19904 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
19905 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
19906 NODE_NAME_CASE(FP_EXTEND_VL)
19907 NODE_NAME_CASE(FP_ROUND_VL)
19908 NODE_NAME_CASE(STRICT_FADD_VL)
19909 NODE_NAME_CASE(STRICT_FSUB_VL)
19910 NODE_NAME_CASE(STRICT_FMUL_VL)
19911 NODE_NAME_CASE(STRICT_FDIV_VL)
19912 NODE_NAME_CASE(STRICT_FSQRT_VL)
19913 NODE_NAME_CASE(STRICT_VFMADD_VL)
19914 NODE_NAME_CASE(STRICT_VFNMADD_VL)
19915 NODE_NAME_CASE(STRICT_VFMSUB_VL)
19916 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
19917 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
19918 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
19919 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
19920 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
19921 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
19922 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
19923 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
19924 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
19925 NODE_NAME_CASE(STRICT_FSETCC_VL)
19926 NODE_NAME_CASE(STRICT_FSETCCS_VL)
19927 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
19928 NODE_NAME_CASE(VWMUL_VL)
19929 NODE_NAME_CASE(VWMULU_VL)
19930 NODE_NAME_CASE(VWMULSU_VL)
19931 NODE_NAME_CASE(VWADD_VL)
19932 NODE_NAME_CASE(VWADDU_VL)
19933 NODE_NAME_CASE(VWSUB_VL)
19934 NODE_NAME_CASE(VWSUBU_VL)
19935 NODE_NAME_CASE(VWADD_W_VL)
19936 NODE_NAME_CASE(VWADDU_W_VL)
19937 NODE_NAME_CASE(VWSUB_W_VL)
19938 NODE_NAME_CASE(VWSUBU_W_VL)
19939 NODE_NAME_CASE(VWSLL_VL)
19940 NODE_NAME_CASE(VFWMUL_VL)
19941 NODE_NAME_CASE(VFWADD_VL)
19942 NODE_NAME_CASE(VFWSUB_VL)
19943 NODE_NAME_CASE(VFWADD_W_VL)
19944 NODE_NAME_CASE(VFWSUB_W_VL)
19945 NODE_NAME_CASE(VWMACC_VL)
19946 NODE_NAME_CASE(VWMACCU_VL)
19947 NODE_NAME_CASE(VWMACCSU_VL)
19948 NODE_NAME_CASE(VNSRL_VL)
19949 NODE_NAME_CASE(SETCC_VL)
19950 NODE_NAME_CASE(VMERGE_VL)
19951 NODE_NAME_CASE(VMAND_VL)
19952 NODE_NAME_CASE(VMOR_VL)
19953 NODE_NAME_CASE(VMXOR_VL)
19954 NODE_NAME_CASE(VMCLR_VL)
19955 NODE_NAME_CASE(VMSET_VL)
19956 NODE_NAME_CASE(VRGATHER_VX_VL)
19957 NODE_NAME_CASE(VRGATHER_VV_VL)
19958 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
19959 NODE_NAME_CASE(VSEXT_VL)
19960 NODE_NAME_CASE(VZEXT_VL)
19961 NODE_NAME_CASE(VCPOP_VL)
19962 NODE_NAME_CASE(VFIRST_VL)
19963 NODE_NAME_CASE(READ_CSR)
19964 NODE_NAME_CASE(WRITE_CSR)
19965 NODE_NAME_CASE(SWAP_CSR)
19966 NODE_NAME_CASE(CZERO_EQZ)
19967 NODE_NAME_CASE(CZERO_NEZ)
19968 NODE_NAME_CASE(SF_VC_XV_SE)
19969 NODE_NAME_CASE(SF_VC_IV_SE)
19970 NODE_NAME_CASE(SF_VC_VV_SE)
19971 NODE_NAME_CASE(SF_VC_FV_SE)
19972 NODE_NAME_CASE(SF_VC_XVV_SE)
19973 NODE_NAME_CASE(SF_VC_IVV_SE)
19974 NODE_NAME_CASE(SF_VC_VVV_SE)
19975 NODE_NAME_CASE(SF_VC_FVV_SE)
19976 NODE_NAME_CASE(SF_VC_XVW_SE)
19977 NODE_NAME_CASE(SF_VC_IVW_SE)
19978 NODE_NAME_CASE(SF_VC_VVW_SE)
19979 NODE_NAME_CASE(SF_VC_FVW_SE)
19980 NODE_NAME_CASE(SF_VC_V_X_SE)
19981 NODE_NAME_CASE(SF_VC_V_I_SE)
19982 NODE_NAME_CASE(SF_VC_V_XV_SE)
19983 NODE_NAME_CASE(SF_VC_V_IV_SE)
19984 NODE_NAME_CASE(SF_VC_V_VV_SE)
19985 NODE_NAME_CASE(SF_VC_V_FV_SE)
19986 NODE_NAME_CASE(SF_VC_V_XVV_SE)
19987 NODE_NAME_CASE(SF_VC_V_IVV_SE)
19988 NODE_NAME_CASE(SF_VC_V_VVV_SE)
19989 NODE_NAME_CASE(SF_VC_V_FVV_SE)
19990 NODE_NAME_CASE(SF_VC_V_XVW_SE)
19991 NODE_NAME_CASE(SF_VC_V_IVW_SE)
19992 NODE_NAME_CASE(SF_VC_V_VVW_SE)
19993 NODE_NAME_CASE(SF_VC_V_FVW_SE)
19994 }
19995 // clang-format on
19996 return nullptr;
19997#undef NODE_NAME_CASE
19998}
19999
20000/// getConstraintType - Given a constraint letter, return the type of
20001/// constraint it is for this target.
20004 if (Constraint.size() == 1) {
20005 switch (Constraint[0]) {
20006 default:
20007 break;
20008 case 'f':
20009 return C_RegisterClass;
20010 case 'I':
20011 case 'J':
20012 case 'K':
20013 return C_Immediate;
20014 case 'A':
20015 return C_Memory;
20016 case 's':
20017 case 'S': // A symbolic address
20018 return C_Other;
20019 }
20020 } else {
20021 if (Constraint == "vr" || Constraint == "vm")
20022 return C_RegisterClass;
20023 }
20024 return TargetLowering::getConstraintType(Constraint);
20025}
20026
20027std::pair<unsigned, const TargetRegisterClass *>
20029 StringRef Constraint,
20030 MVT VT) const {
20031 // First, see if this is a constraint that directly corresponds to a RISC-V
20032 // register class.
20033 if (Constraint.size() == 1) {
20034 switch (Constraint[0]) {
20035 case 'r':
20036 // TODO: Support fixed vectors up to XLen for P extension?
20037 if (VT.isVector())
20038 break;
20039 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20040 return std::make_pair(0U, &RISCV::GPRF16RegClass);
20041 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20042 return std::make_pair(0U, &RISCV::GPRF32RegClass);
20043 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20044 return std::make_pair(0U, &RISCV::GPRPairRegClass);
20045 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20046 case 'f':
20047 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
20048 return std::make_pair(0U, &RISCV::FPR16RegClass);
20049 if (Subtarget.hasStdExtF() && VT == MVT::f32)
20050 return std::make_pair(0U, &RISCV::FPR32RegClass);
20051 if (Subtarget.hasStdExtD() && VT == MVT::f64)
20052 return std::make_pair(0U, &RISCV::FPR64RegClass);
20053 break;
20054 default:
20055 break;
20056 }
20057 } else if (Constraint == "vr") {
20058 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
20059 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20060 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20061 return std::make_pair(0U, RC);
20062 }
20063 } else if (Constraint == "vm") {
20064 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20065 return std::make_pair(0U, &RISCV::VMV0RegClass);
20066 }
20067
20068 // Clang will correctly decode the usage of register name aliases into their
20069 // official names. However, other frontends like `rustc` do not. This allows
20070 // users of these frontends to use the ABI names for registers in LLVM-style
20071 // register constraints.
20072 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20073 .Case("{zero}", RISCV::X0)
20074 .Case("{ra}", RISCV::X1)
20075 .Case("{sp}", RISCV::X2)
20076 .Case("{gp}", RISCV::X3)
20077 .Case("{tp}", RISCV::X4)
20078 .Case("{t0}", RISCV::X5)
20079 .Case("{t1}", RISCV::X6)
20080 .Case("{t2}", RISCV::X7)
20081 .Cases("{s0}", "{fp}", RISCV::X8)
20082 .Case("{s1}", RISCV::X9)
20083 .Case("{a0}", RISCV::X10)
20084 .Case("{a1}", RISCV::X11)
20085 .Case("{a2}", RISCV::X12)
20086 .Case("{a3}", RISCV::X13)
20087 .Case("{a4}", RISCV::X14)
20088 .Case("{a5}", RISCV::X15)
20089 .Case("{a6}", RISCV::X16)
20090 .Case("{a7}", RISCV::X17)
20091 .Case("{s2}", RISCV::X18)
20092 .Case("{s3}", RISCV::X19)
20093 .Case("{s4}", RISCV::X20)
20094 .Case("{s5}", RISCV::X21)
20095 .Case("{s6}", RISCV::X22)
20096 .Case("{s7}", RISCV::X23)
20097 .Case("{s8}", RISCV::X24)
20098 .Case("{s9}", RISCV::X25)
20099 .Case("{s10}", RISCV::X26)
20100 .Case("{s11}", RISCV::X27)
20101 .Case("{t3}", RISCV::X28)
20102 .Case("{t4}", RISCV::X29)
20103 .Case("{t5}", RISCV::X30)
20104 .Case("{t6}", RISCV::X31)
20105 .Default(RISCV::NoRegister);
20106 if (XRegFromAlias != RISCV::NoRegister)
20107 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20108
20109 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20110 // TableGen record rather than the AsmName to choose registers for InlineAsm
20111 // constraints, plus we want to match those names to the widest floating point
20112 // register type available, manually select floating point registers here.
20113 //
20114 // The second case is the ABI name of the register, so that frontends can also
20115 // use the ABI names in register constraint lists.
20116 if (Subtarget.hasStdExtF()) {
20117 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20118 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20119 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20120 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20121 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20122 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20123 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20124 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20125 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20126 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20127 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20128 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20129 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20130 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20131 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20132 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20133 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20134 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20135 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20136 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20137 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20138 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20139 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20140 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20141 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20142 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20143 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20144 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20145 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20146 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20147 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20148 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20149 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20150 .Default(RISCV::NoRegister);
20151 if (FReg != RISCV::NoRegister) {
20152 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20153 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20154 unsigned RegNo = FReg - RISCV::F0_F;
20155 unsigned DReg = RISCV::F0_D + RegNo;
20156 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20157 }
20158 if (VT == MVT::f32 || VT == MVT::Other)
20159 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20160 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20161 unsigned RegNo = FReg - RISCV::F0_F;
20162 unsigned HReg = RISCV::F0_H + RegNo;
20163 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20164 }
20165 }
20166 }
20167
20168 if (Subtarget.hasVInstructions()) {
20169 Register VReg = StringSwitch<Register>(Constraint.lower())
20170 .Case("{v0}", RISCV::V0)
20171 .Case("{v1}", RISCV::V1)
20172 .Case("{v2}", RISCV::V2)
20173 .Case("{v3}", RISCV::V3)
20174 .Case("{v4}", RISCV::V4)
20175 .Case("{v5}", RISCV::V5)
20176 .Case("{v6}", RISCV::V6)
20177 .Case("{v7}", RISCV::V7)
20178 .Case("{v8}", RISCV::V8)
20179 .Case("{v9}", RISCV::V9)
20180 .Case("{v10}", RISCV::V10)
20181 .Case("{v11}", RISCV::V11)
20182 .Case("{v12}", RISCV::V12)
20183 .Case("{v13}", RISCV::V13)
20184 .Case("{v14}", RISCV::V14)
20185 .Case("{v15}", RISCV::V15)
20186 .Case("{v16}", RISCV::V16)
20187 .Case("{v17}", RISCV::V17)
20188 .Case("{v18}", RISCV::V18)
20189 .Case("{v19}", RISCV::V19)
20190 .Case("{v20}", RISCV::V20)
20191 .Case("{v21}", RISCV::V21)
20192 .Case("{v22}", RISCV::V22)
20193 .Case("{v23}", RISCV::V23)
20194 .Case("{v24}", RISCV::V24)
20195 .Case("{v25}", RISCV::V25)
20196 .Case("{v26}", RISCV::V26)
20197 .Case("{v27}", RISCV::V27)
20198 .Case("{v28}", RISCV::V28)
20199 .Case("{v29}", RISCV::V29)
20200 .Case("{v30}", RISCV::V30)
20201 .Case("{v31}", RISCV::V31)
20202 .Default(RISCV::NoRegister);
20203 if (VReg != RISCV::NoRegister) {
20204 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20205 return std::make_pair(VReg, &RISCV::VMRegClass);
20206 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20207 return std::make_pair(VReg, &RISCV::VRRegClass);
20208 for (const auto *RC :
20209 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20210 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20211 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20212 return std::make_pair(VReg, RC);
20213 }
20214 }
20215 }
20216 }
20217
20218 std::pair<Register, const TargetRegisterClass *> Res =
20220
20221 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20222 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20223 // Subtarget into account.
20224 if (Res.second == &RISCV::GPRF16RegClass ||
20225 Res.second == &RISCV::GPRF32RegClass ||
20226 Res.second == &RISCV::GPRPairRegClass)
20227 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20228
20229 return Res;
20230}
20231
20234 // Currently only support length 1 constraints.
20235 if (ConstraintCode.size() == 1) {
20236 switch (ConstraintCode[0]) {
20237 case 'A':
20239 default:
20240 break;
20241 }
20242 }
20243
20244 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20245}
20246
20248 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20249 SelectionDAG &DAG) const {
20250 // Currently only support length 1 constraints.
20251 if (Constraint.size() == 1) {
20252 switch (Constraint[0]) {
20253 case 'I':
20254 // Validate & create a 12-bit signed immediate operand.
20255 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20256 uint64_t CVal = C->getSExtValue();
20257 if (isInt<12>(CVal))
20258 Ops.push_back(
20259 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20260 }
20261 return;
20262 case 'J':
20263 // Validate & create an integer zero operand.
20264 if (isNullConstant(Op))
20265 Ops.push_back(
20266 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20267 return;
20268 case 'K':
20269 // Validate & create a 5-bit unsigned immediate operand.
20270 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20271 uint64_t CVal = C->getZExtValue();
20272 if (isUInt<5>(CVal))
20273 Ops.push_back(
20274 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20275 }
20276 return;
20277 case 'S':
20279 return;
20280 default:
20281 break;
20282 }
20283 }
20284 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20285}
20286
20288 Instruction *Inst,
20289 AtomicOrdering Ord) const {
20290 if (Subtarget.hasStdExtZtso()) {
20291 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20292 return Builder.CreateFence(Ord);
20293 return nullptr;
20294 }
20295
20296 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20297 return Builder.CreateFence(Ord);
20298 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20299 return Builder.CreateFence(AtomicOrdering::Release);
20300 return nullptr;
20301}
20302
20304 Instruction *Inst,
20305 AtomicOrdering Ord) const {
20306 if (Subtarget.hasStdExtZtso()) {
20307 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20308 return Builder.CreateFence(Ord);
20309 return nullptr;
20310 }
20311
20312 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20313 return Builder.CreateFence(AtomicOrdering::Acquire);
20314 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
20317 return nullptr;
20318}
20319
20322 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20323 // point operations can't be used in an lr/sc sequence without breaking the
20324 // forward-progress guarantee.
20325 if (AI->isFloatingPointOperation() ||
20329
20330 // Don't expand forced atomics, we want to have __sync libcalls instead.
20331 if (Subtarget.hasForcedAtomics())
20333
20334 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20335 if (AI->getOperation() == AtomicRMWInst::Nand) {
20336 if (Subtarget.hasStdExtZacas() &&
20337 (Size >= 32 || Subtarget.hasStdExtZabha()))
20339 if (Size < 32)
20341 }
20342
20343 if (Size < 32 && !Subtarget.hasStdExtZabha())
20345
20347}
20348
20349static Intrinsic::ID
20351 if (XLen == 32) {
20352 switch (BinOp) {
20353 default:
20354 llvm_unreachable("Unexpected AtomicRMW BinOp");
20356 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20357 case AtomicRMWInst::Add:
20358 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20359 case AtomicRMWInst::Sub:
20360 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20362 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20363 case AtomicRMWInst::Max:
20364 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20365 case AtomicRMWInst::Min:
20366 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20368 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20370 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20371 }
20372 }
20373
20374 if (XLen == 64) {
20375 switch (BinOp) {
20376 default:
20377 llvm_unreachable("Unexpected AtomicRMW BinOp");
20379 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20380 case AtomicRMWInst::Add:
20381 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20382 case AtomicRMWInst::Sub:
20383 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20385 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20386 case AtomicRMWInst::Max:
20387 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20388 case AtomicRMWInst::Min:
20389 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20391 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20393 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20394 }
20395 }
20396
20397 llvm_unreachable("Unexpected XLen\n");
20398}
20399
20401 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20402 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20403 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20404 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20405 // mask, as this produces better code than the LR/SC loop emitted by
20406 // int_riscv_masked_atomicrmw_xchg.
20407 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20408 isa<ConstantInt>(AI->getValOperand())) {
20409 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20410 if (CVal->isZero())
20411 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20412 Builder.CreateNot(Mask, "Inv_Mask"),
20413 AI->getAlign(), Ord);
20414 if (CVal->isMinusOne())
20415 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20416 AI->getAlign(), Ord);
20417 }
20418
20419 unsigned XLen = Subtarget.getXLen();
20420 Value *Ordering =
20421 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20422 Type *Tys[] = {AlignedAddr->getType()};
20423 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20424 AI->getModule(),
20426
20427 if (XLen == 64) {
20428 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20429 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20430 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20431 }
20432
20433 Value *Result;
20434
20435 // Must pass the shift amount needed to sign extend the loaded value prior
20436 // to performing a signed comparison for min/max. ShiftAmt is the number of
20437 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20438 // is the number of bits to left+right shift the value in order to
20439 // sign-extend.
20440 if (AI->getOperation() == AtomicRMWInst::Min ||
20442 const DataLayout &DL = AI->getModule()->getDataLayout();
20443 unsigned ValWidth =
20444 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20445 Value *SextShamt =
20446 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20447 Result = Builder.CreateCall(LrwOpScwLoop,
20448 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20449 } else {
20450 Result =
20451 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20452 }
20453
20454 if (XLen == 64)
20455 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20456 return Result;
20457}
20458
20461 AtomicCmpXchgInst *CI) const {
20462 // Don't expand forced atomics, we want to have __sync libcalls instead.
20463 if (Subtarget.hasForcedAtomics())
20465
20467 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20468 (Size == 8 || Size == 16))
20471}
20472
20474 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20475 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20476 unsigned XLen = Subtarget.getXLen();
20477 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20478 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20479 if (XLen == 64) {
20480 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20481 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20482 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20483 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20484 }
20485 Type *Tys[] = {AlignedAddr->getType()};
20486 Function *MaskedCmpXchg =
20487 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20488 Value *Result = Builder.CreateCall(
20489 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20490 if (XLen == 64)
20491 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20492 return Result;
20493}
20494
20496 EVT DataVT) const {
20497 // We have indexed loads for all supported EEW types. Indices are always
20498 // zero extended.
20499 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20500 isTypeLegal(Extend.getValueType()) &&
20501 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20502 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20503}
20504
20506 EVT VT) const {
20507 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20508 return false;
20509
20510 switch (FPVT.getSimpleVT().SimpleTy) {
20511 case MVT::f16:
20512 return Subtarget.hasStdExtZfhmin();
20513 case MVT::f32:
20514 return Subtarget.hasStdExtF();
20515 case MVT::f64:
20516 return Subtarget.hasStdExtD();
20517 default:
20518 return false;
20519 }
20520}
20521
20523 // If we are using the small code model, we can reduce size of jump table
20524 // entry to 4 bytes.
20525 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20528 }
20530}
20531
20533 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20534 unsigned uid, MCContext &Ctx) const {
20535 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20537 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20538}
20539
20541 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20542 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20543 // a power of two as well.
20544 // FIXME: This doesn't work for zve32, but that's already broken
20545 // elsewhere for the same reason.
20546 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20547 static_assert(RISCV::RVVBitsPerBlock == 64,
20548 "RVVBitsPerBlock changed, audit needed");
20549 return true;
20550}
20551
20553 SDValue &Offset,
20555 SelectionDAG &DAG) const {
20556 // Target does not support indexed loads.
20557 if (!Subtarget.hasVendorXTHeadMemIdx())
20558 return false;
20559
20560 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20561 return false;
20562
20563 Base = Op->getOperand(0);
20564 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20565 int64_t RHSC = RHS->getSExtValue();
20566 if (Op->getOpcode() == ISD::SUB)
20567 RHSC = -(uint64_t)RHSC;
20568
20569 // The constants that can be encoded in the THeadMemIdx instructions
20570 // are of the form (sign_extend(imm5) << imm2).
20571 bool isLegalIndexedOffset = false;
20572 for (unsigned i = 0; i < 4; i++)
20573 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20574 isLegalIndexedOffset = true;
20575 break;
20576 }
20577
20578 if (!isLegalIndexedOffset)
20579 return false;
20580
20581 Offset = Op->getOperand(1);
20582 return true;
20583 }
20584
20585 return false;
20586}
20587
20589 SDValue &Offset,
20591 SelectionDAG &DAG) const {
20592 EVT VT;
20593 SDValue Ptr;
20594 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20595 VT = LD->getMemoryVT();
20596 Ptr = LD->getBasePtr();
20597 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20598 VT = ST->getMemoryVT();
20599 Ptr = ST->getBasePtr();
20600 } else
20601 return false;
20602
20603 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20604 return false;
20605
20606 AM = ISD::PRE_INC;
20607 return true;
20608}
20609
20611 SDValue &Base,
20612 SDValue &Offset,
20614 SelectionDAG &DAG) const {
20615 EVT VT;
20616 SDValue Ptr;
20617 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20618 VT = LD->getMemoryVT();
20619 Ptr = LD->getBasePtr();
20620 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20621 VT = ST->getMemoryVT();
20622 Ptr = ST->getBasePtr();
20623 } else
20624 return false;
20625
20626 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20627 return false;
20628 // Post-indexing updates the base, so it's not a valid transform
20629 // if that's not the same as the load's pointer.
20630 if (Ptr != Base)
20631 return false;
20632
20633 AM = ISD::POST_INC;
20634 return true;
20635}
20636
20638 EVT VT) const {
20639 EVT SVT = VT.getScalarType();
20640
20641 if (!SVT.isSimple())
20642 return false;
20643
20644 switch (SVT.getSimpleVT().SimpleTy) {
20645 case MVT::f16:
20646 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20647 : Subtarget.hasStdExtZfhOrZhinx();
20648 case MVT::f32:
20649 return Subtarget.hasStdExtFOrZfinx();
20650 case MVT::f64:
20651 return Subtarget.hasStdExtDOrZdinx();
20652 default:
20653 break;
20654 }
20655
20656 return false;
20657}
20658
20660 // Zacas will use amocas.w which does not require extension.
20661 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20662}
20663
20665 const Constant *PersonalityFn) const {
20666 return RISCV::X10;
20667}
20668
20670 const Constant *PersonalityFn) const {
20671 return RISCV::X11;
20672}
20673
20675 // Return false to suppress the unnecessary extensions if the LibCall
20676 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20677 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20678 Type.getSizeInBits() < Subtarget.getXLen()))
20679 return false;
20680
20681 return true;
20682}
20683
20685 if (Subtarget.is64Bit() && Type == MVT::i32)
20686 return true;
20687
20688 return IsSigned;
20689}
20690
20692 SDValue C) const {
20693 // Check integral scalar types.
20694 const bool HasExtMOrZmmul =
20695 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
20696 if (!VT.isScalarInteger())
20697 return false;
20698
20699 // Omit the optimization if the sub target has the M extension and the data
20700 // size exceeds XLen.
20701 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20702 return false;
20703
20704 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
20705 // Break the MUL to a SLLI and an ADD/SUB.
20706 const APInt &Imm = ConstNode->getAPIntValue();
20707 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20708 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20709 return true;
20710
20711 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20712 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20713 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20714 (Imm - 8).isPowerOf2()))
20715 return true;
20716
20717 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20718 // a pair of LUI/ADDI.
20719 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20720 ConstNode->hasOneUse()) {
20721 APInt ImmS = Imm.ashr(Imm.countr_zero());
20722 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20723 (1 - ImmS).isPowerOf2())
20724 return true;
20725 }
20726 }
20727
20728 return false;
20729}
20730
20732 SDValue ConstNode) const {
20733 // Let the DAGCombiner decide for vectors.
20734 EVT VT = AddNode.getValueType();
20735 if (VT.isVector())
20736 return true;
20737
20738 // Let the DAGCombiner decide for larger types.
20739 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20740 return true;
20741
20742 // It is worse if c1 is simm12 while c1*c2 is not.
20743 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
20744 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
20745 const APInt &C1 = C1Node->getAPIntValue();
20746 const APInt &C2 = C2Node->getAPIntValue();
20747 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
20748 return false;
20749
20750 // Default to true and let the DAGCombiner decide.
20751 return true;
20752}
20753
20755 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20756 unsigned *Fast) const {
20757 if (!VT.isVector()) {
20758 if (Fast)
20759 *Fast = Subtarget.enableUnalignedScalarMem();
20760 return Subtarget.enableUnalignedScalarMem();
20761 }
20762
20763 // All vector implementations must support element alignment
20764 EVT ElemVT = VT.getVectorElementType();
20765 if (Alignment >= ElemVT.getStoreSize()) {
20766 if (Fast)
20767 *Fast = 1;
20768 return true;
20769 }
20770
20771 // Note: We lower an unmasked unaligned vector access to an equally sized
20772 // e8 element type access. Given this, we effectively support all unmasked
20773 // misaligned accesses. TODO: Work through the codegen implications of
20774 // allowing such accesses to be formed, and considered fast.
20775 if (Fast)
20776 *Fast = Subtarget.enableUnalignedVectorMem();
20777 return Subtarget.enableUnalignedVectorMem();
20778}
20779
20780
20782 const AttributeList &FuncAttributes) const {
20783 if (!Subtarget.hasVInstructions())
20784 return MVT::Other;
20785
20786 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
20787 return MVT::Other;
20788
20789 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
20790 // has an expansion threshold, and we want the number of hardware memory
20791 // operations to correspond roughly to that threshold. LMUL>1 operations
20792 // are typically expanded linearly internally, and thus correspond to more
20793 // than one actual memory operation. Note that store merging and load
20794 // combining will typically form larger LMUL operations from the LMUL1
20795 // operations emitted here, and that's okay because combining isn't
20796 // introducing new memory operations; it's just merging existing ones.
20797 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
20798 if (Op.size() < MinVLenInBytes)
20799 // TODO: Figure out short memops. For the moment, do the default thing
20800 // which ends up using scalar sequences.
20801 return MVT::Other;
20802
20803 // Prefer i8 for non-zero memset as it allows us to avoid materializing
20804 // a large scalar constant and instead use vmv.v.x/i to do the
20805 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
20806 // maximize the chance we can encode the size in the vsetvli.
20807 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
20808 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
20809
20810 // Do we have sufficient alignment for our preferred VT? If not, revert
20811 // to largest size allowed by our alignment criteria.
20812 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
20813 Align RequiredAlign(PreferredVT.getStoreSize());
20814 if (Op.isFixedDstAlign())
20815 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
20816 if (Op.isMemcpy())
20817 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
20818 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
20819 }
20820 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
20821}
20822
20824 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20825 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20826 bool IsABIRegCopy = CC.has_value();
20827 EVT ValueVT = Val.getValueType();
20828 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20829 PartVT == MVT::f32) {
20830 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
20831 // nan, and cast to f32.
20832 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
20833 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
20834 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
20835 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
20836 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
20837 Parts[0] = Val;
20838 return true;
20839 }
20840
20841 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20842 LLVMContext &Context = *DAG.getContext();
20843 EVT ValueEltVT = ValueVT.getVectorElementType();
20844 EVT PartEltVT = PartVT.getVectorElementType();
20845 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20846 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20847 if (PartVTBitSize % ValueVTBitSize == 0) {
20848 assert(PartVTBitSize >= ValueVTBitSize);
20849 // If the element types are different, bitcast to the same element type of
20850 // PartVT first.
20851 // Give an example here, we want copy a <vscale x 1 x i8> value to
20852 // <vscale x 4 x i16>.
20853 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
20854 // subvector, then we can bitcast to <vscale x 4 x i16>.
20855 if (ValueEltVT != PartEltVT) {
20856 if (PartVTBitSize > ValueVTBitSize) {
20857 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20858 assert(Count != 0 && "The number of element should not be zero.");
20859 EVT SameEltTypeVT =
20860 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20861 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
20862 DAG.getUNDEF(SameEltTypeVT), Val,
20863 DAG.getVectorIdxConstant(0, DL));
20864 }
20865 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
20866 } else {
20867 Val =
20868 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
20869 Val, DAG.getVectorIdxConstant(0, DL));
20870 }
20871 Parts[0] = Val;
20872 return true;
20873 }
20874 }
20875 return false;
20876}
20877
20879 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
20880 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
20881 bool IsABIRegCopy = CC.has_value();
20882 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20883 PartVT == MVT::f32) {
20884 SDValue Val = Parts[0];
20885
20886 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
20887 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
20888 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
20889 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
20890 return Val;
20891 }
20892
20893 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20894 LLVMContext &Context = *DAG.getContext();
20895 SDValue Val = Parts[0];
20896 EVT ValueEltVT = ValueVT.getVectorElementType();
20897 EVT PartEltVT = PartVT.getVectorElementType();
20898 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20899 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20900 if (PartVTBitSize % ValueVTBitSize == 0) {
20901 assert(PartVTBitSize >= ValueVTBitSize);
20902 EVT SameEltTypeVT = ValueVT;
20903 // If the element types are different, convert it to the same element type
20904 // of PartVT.
20905 // Give an example here, we want copy a <vscale x 1 x i8> value from
20906 // <vscale x 4 x i16>.
20907 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
20908 // then we can extract <vscale x 1 x i8>.
20909 if (ValueEltVT != PartEltVT) {
20910 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20911 assert(Count != 0 && "The number of element should not be zero.");
20912 SameEltTypeVT =
20913 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20914 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
20915 }
20916 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
20917 DAG.getVectorIdxConstant(0, DL));
20918 return Val;
20919 }
20920 }
20921 return SDValue();
20922}
20923
20925 // When aggressively optimizing for code size, we prefer to use a div
20926 // instruction, as it is usually smaller than the alternative sequence.
20927 // TODO: Add vector division?
20928 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
20929 return OptSize && !VT.isVector();
20930}
20931
20933 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
20934 // some situation.
20935 unsigned Opc = N->getOpcode();
20936 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
20937 return false;
20938 return true;
20939}
20940
20941static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
20942 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
20943 Function *ThreadPointerFunc =
20944 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
20945 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
20946 IRB.CreateCall(ThreadPointerFunc), Offset);
20947}
20948
20950 // Fuchsia provides a fixed TLS slot for the stack cookie.
20951 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
20952 if (Subtarget.isTargetFuchsia())
20953 return useTpOffset(IRB, -0x10);
20954
20956}
20957
20959 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
20960 const DataLayout &DL) const {
20961 EVT VT = getValueType(DL, VTy);
20962 // Don't lower vlseg/vsseg for vector types that can't be split.
20963 if (!isTypeLegal(VT))
20964 return false;
20965
20967 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
20968 Alignment))
20969 return false;
20970
20971 MVT ContainerVT = VT.getSimpleVT();
20972
20973 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
20974 if (!Subtarget.useRVVForFixedLengthVectors())
20975 return false;
20976 // Sometimes the interleaved access pass picks up splats as interleaves of
20977 // one element. Don't lower these.
20978 if (FVTy->getNumElements() < 2)
20979 return false;
20980
20982 }
20983
20984 // Need to make sure that EMUL * NFIELDS ≤ 8
20985 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
20986 if (Fractional)
20987 return true;
20988 return Factor * LMUL <= 8;
20989}
20990
20992 Align Alignment) const {
20993 if (!Subtarget.hasVInstructions())
20994 return false;
20995
20996 // Only support fixed vectors if we know the minimum vector size.
20997 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
20998 return false;
20999
21000 EVT ScalarType = DataType.getScalarType();
21001 if (!isLegalElementTypeForRVV(ScalarType))
21002 return false;
21003
21004 if (!Subtarget.enableUnalignedVectorMem() &&
21005 Alignment < ScalarType.getStoreSize())
21006 return false;
21007
21008 return true;
21009}
21010
21012 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21013 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21014 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21015 Intrinsic::riscv_seg8_load};
21016
21017/// Lower an interleaved load into a vlsegN intrinsic.
21018///
21019/// E.g. Lower an interleaved load (Factor = 2):
21020/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21021/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21022/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21023///
21024/// Into:
21025/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21026/// %ptr, i64 4)
21027/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21028/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21031 ArrayRef<unsigned> Indices, unsigned Factor) const {
21032 IRBuilder<> Builder(LI);
21033
21034 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
21035 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
21037 LI->getModule()->getDataLayout()))
21038 return false;
21039
21040 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21041
21042 Function *VlsegNFunc =
21044 {VTy, LI->getPointerOperandType(), XLenTy});
21045
21046 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21047
21048 CallInst *VlsegN =
21049 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21050
21051 for (unsigned i = 0; i < Shuffles.size(); i++) {
21052 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
21053 Shuffles[i]->replaceAllUsesWith(SubVec);
21054 }
21055
21056 return true;
21057}
21058
21060 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
21061 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
21062 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
21063 Intrinsic::riscv_seg8_store};
21064
21065/// Lower an interleaved store into a vssegN intrinsic.
21066///
21067/// E.g. Lower an interleaved store (Factor = 3):
21068/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21069/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21070/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21071///
21072/// Into:
21073/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21074/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21075/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21076/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21077/// %ptr, i32 4)
21078///
21079/// Note that the new shufflevectors will be removed and we'll only generate one
21080/// vsseg3 instruction in CodeGen.
21082 ShuffleVectorInst *SVI,
21083 unsigned Factor) const {
21084 IRBuilder<> Builder(SI);
21085 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21086 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21087 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21088 ShuffleVTy->getNumElements() / Factor);
21089 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21090 SI->getPointerAddressSpace(),
21091 SI->getModule()->getDataLayout()))
21092 return false;
21093
21094 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21095
21096 Function *VssegNFunc =
21097 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21098 {VTy, SI->getPointerOperandType(), XLenTy});
21099
21100 auto Mask = SVI->getShuffleMask();
21102
21103 for (unsigned i = 0; i < Factor; i++) {
21104 Value *Shuffle = Builder.CreateShuffleVector(
21105 SVI->getOperand(0), SVI->getOperand(1),
21106 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21107 Ops.push_back(Shuffle);
21108 }
21109 // This VL should be OK (should be executable in one vsseg instruction,
21110 // potentially under larger LMULs) because we checked that the fixed vector
21111 // type fits in isLegalInterleavedAccessType
21112 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21113 Ops.append({SI->getPointerOperand(), VL});
21114
21115 Builder.CreateCall(VssegNFunc, Ops);
21116
21117 return true;
21118}
21119
21121 LoadInst *LI) const {
21122 assert(LI->isSimple());
21123 IRBuilder<> Builder(LI);
21124
21125 // Only deinterleave2 supported at present.
21126 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21127 return false;
21128
21129 unsigned Factor = 2;
21130
21131 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21132 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21133
21134 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21136 LI->getModule()->getDataLayout()))
21137 return false;
21138
21139 Function *VlsegNFunc;
21140 Value *VL;
21141 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21143
21144 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21145 VlsegNFunc = Intrinsic::getDeclaration(
21146 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21147 {ResVTy, LI->getPointerOperandType(), XLenTy});
21148 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21149 } else {
21150 static const Intrinsic::ID IntrIds[] = {
21151 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21152 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21153 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21154 Intrinsic::riscv_vlseg8};
21155
21156 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21157 {ResVTy, XLenTy});
21158 VL = Constant::getAllOnesValue(XLenTy);
21159 Ops.append(Factor, PoisonValue::get(ResVTy));
21160 }
21161
21162 Ops.append({LI->getPointerOperand(), VL});
21163
21164 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21165 DI->replaceAllUsesWith(Vlseg);
21166
21167 return true;
21168}
21169
21171 StoreInst *SI) const {
21172 assert(SI->isSimple());
21173 IRBuilder<> Builder(SI);
21174
21175 // Only interleave2 supported at present.
21176 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21177 return false;
21178
21179 unsigned Factor = 2;
21180
21181 VectorType *VTy = cast<VectorType>(II->getType());
21182 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21183
21184 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21185 SI->getPointerAddressSpace(),
21186 SI->getModule()->getDataLayout()))
21187 return false;
21188
21189 Function *VssegNFunc;
21190 Value *VL;
21191 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21192
21193 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21194 VssegNFunc = Intrinsic::getDeclaration(
21195 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21196 {InVTy, SI->getPointerOperandType(), XLenTy});
21197 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21198 } else {
21199 static const Intrinsic::ID IntrIds[] = {
21200 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21201 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21202 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21203 Intrinsic::riscv_vsseg8};
21204
21205 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21206 {InVTy, XLenTy});
21207 VL = Constant::getAllOnesValue(XLenTy);
21208 }
21209
21210 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21211 SI->getPointerOperand(), VL});
21212
21213 return true;
21214}
21215
21219 const TargetInstrInfo *TII) const {
21220 assert(MBBI->isCall() && MBBI->getCFIType() &&
21221 "Invalid call instruction for a KCFI check");
21222 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21223 MBBI->getOpcode()));
21224
21225 MachineOperand &Target = MBBI->getOperand(0);
21226 Target.setIsRenamable(false);
21227
21228 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21229 .addReg(Target.getReg())
21230 .addImm(MBBI->getCFIType())
21231 .getInstr();
21232}
21233
21234#define GET_REGISTER_MATCHER
21235#include "RISCVGenAsmMatcher.inc"
21236
21239 const MachineFunction &MF) const {
21241 if (Reg == RISCV::NoRegister)
21243 if (Reg == RISCV::NoRegister)
21245 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21246 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21247 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21248 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21249 StringRef(RegName) + "\"."));
21250 return Reg;
21251}
21252
21255 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21256
21257 if (NontemporalInfo == nullptr)
21259
21260 // 1 for default value work as __RISCV_NTLH_ALL
21261 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21262 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21263 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21264 // 5 -> __RISCV_NTLH_ALL
21265 int NontemporalLevel = 5;
21266 const MDNode *RISCVNontemporalInfo =
21267 I.getMetadata("riscv-nontemporal-domain");
21268 if (RISCVNontemporalInfo != nullptr)
21269 NontemporalLevel =
21270 cast<ConstantInt>(
21271 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21272 ->getValue())
21273 ->getZExtValue();
21274
21275 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21276 "RISC-V target doesn't support this non-temporal domain.");
21277
21278 NontemporalLevel -= 2;
21280 if (NontemporalLevel & 0b1)
21281 Flags |= MONontemporalBit0;
21282 if (NontemporalLevel & 0b10)
21283 Flags |= MONontemporalBit1;
21284
21285 return Flags;
21286}
21287
21290
21291 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21293 TargetFlags |= (NodeFlags & MONontemporalBit0);
21294 TargetFlags |= (NodeFlags & MONontemporalBit1);
21295 return TargetFlags;
21296}
21297
21299 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21300 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21301}
21302
21304 if (VT.isScalableVector())
21305 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21306 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21307 return true;
21308 return Subtarget.hasStdExtZbb() &&
21309 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21310}
21311
21313 ISD::CondCode Cond) const {
21314 return isCtpopFast(VT) ? 0 : 1;
21315}
21316
21318
21319 // GISel support is in progress or complete for these opcodes.
21320 unsigned Op = Inst.getOpcode();
21321 if (Op == Instruction::Add || Op == Instruction::Sub ||
21322 Op == Instruction::And || Op == Instruction::Or ||
21323 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21324 Op == Instruction::ShuffleVector || Op == Instruction::Load)
21325 return false;
21326
21327 if (Inst.getType()->isScalableTy())
21328 return true;
21329
21330 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21331 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21332 !isa<ReturnInst>(&Inst))
21333 return true;
21334
21335 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21336 if (AI->getAllocatedType()->isScalableTy())
21337 return true;
21338 }
21339
21340 return false;
21341}
21342
21343SDValue
21344RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21345 SelectionDAG &DAG,
21346 SmallVectorImpl<SDNode *> &Created) const {
21348 if (isIntDivCheap(N->getValueType(0), Attr))
21349 return SDValue(N, 0); // Lower SDIV as SDIV
21350
21351 // Only perform this transform if short forward branch opt is supported.
21352 if (!Subtarget.hasShortForwardBranchOpt())
21353 return SDValue();
21354 EVT VT = N->getValueType(0);
21355 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21356 return SDValue();
21357
21358 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21359 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21360 return SDValue();
21361 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21362}
21363
21364bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21365 EVT VT, const APInt &AndMask) const {
21366 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21367 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21369}
21370
21371unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21372 return Subtarget.getMinimumJumpTableEntries();
21373}
21374
21375// Handle single arg such as return value.
21376template <typename Arg>
21377void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21378 // This lambda determines whether an array of types are constructed by
21379 // homogeneous vector types.
21380 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21381 // First, extract the first element in the argument type.
21382 auto It = ArgList.begin();
21383 MVT FirstArgRegType = It->VT;
21384
21385 // Return if there is no return or the type needs split.
21386 if (It == ArgList.end() || It->Flags.isSplit())
21387 return false;
21388
21389 ++It;
21390
21391 // Return if this argument type contains only 1 element, or it's not a
21392 // vector type.
21393 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21394 return false;
21395
21396 // Second, check if the following elements in this argument type are all the
21397 // same.
21398 for (; It != ArgList.end(); ++It)
21399 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21400 return false;
21401
21402 return true;
21403 };
21404
21405 if (isHomogeneousScalableVectorType(ArgList)) {
21406 // Handle as tuple type
21407 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21408 } else {
21409 // Handle as normal vector type
21410 bool FirstVMaskAssigned = false;
21411 for (const auto &OutArg : ArgList) {
21412 MVT RegisterVT = OutArg.VT;
21413
21414 // Skip non-RVV register type
21415 if (!RegisterVT.isVector())
21416 continue;
21417
21418 if (RegisterVT.isFixedLengthVector())
21419 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21420
21421 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21422 RVVArgInfos.push_back({1, RegisterVT, true});
21423 FirstVMaskAssigned = true;
21424 continue;
21425 }
21426
21427 RVVArgInfos.push_back({1, RegisterVT, false});
21428 }
21429 }
21430}
21431
21432// Handle multiple args.
21433template <>
21434void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21435 const DataLayout &DL = MF->getDataLayout();
21436 const Function &F = MF->getFunction();
21437 LLVMContext &Context = F.getContext();
21438
21439 bool FirstVMaskAssigned = false;
21440 for (Type *Ty : TypeList) {
21441 StructType *STy = dyn_cast<StructType>(Ty);
21442 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21443 Type *ElemTy = STy->getTypeAtIndex(0U);
21444 EVT VT = TLI->getValueType(DL, ElemTy);
21445 MVT RegisterVT =
21446 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21447 unsigned NumRegs =
21448 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21449
21450 RVVArgInfos.push_back(
21451 {NumRegs * STy->getNumElements(), RegisterVT, false});
21452 } else {
21453 SmallVector<EVT, 4> ValueVTs;
21454 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21455
21456 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21457 ++Value) {
21458 EVT VT = ValueVTs[Value];
21459 MVT RegisterVT =
21460 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21461 unsigned NumRegs =
21462 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21463
21464 // Skip non-RVV register type
21465 if (!RegisterVT.isVector())
21466 continue;
21467
21468 if (RegisterVT.isFixedLengthVector())
21469 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21470
21471 if (!FirstVMaskAssigned &&
21472 RegisterVT.getVectorElementType() == MVT::i1) {
21473 RVVArgInfos.push_back({1, RegisterVT, true});
21474 FirstVMaskAssigned = true;
21475 --NumRegs;
21476 }
21477
21478 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21479 }
21480 }
21481 }
21482}
21483
21484void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21485 unsigned StartReg) {
21486 assert((StartReg % LMul) == 0 &&
21487 "Start register number should be multiple of lmul");
21488 const MCPhysReg *VRArrays;
21489 switch (LMul) {
21490 default:
21491 report_fatal_error("Invalid lmul");
21492 case 1:
21493 VRArrays = ArgVRs;
21494 break;
21495 case 2:
21496 VRArrays = ArgVRM2s;
21497 break;
21498 case 4:
21499 VRArrays = ArgVRM4s;
21500 break;
21501 case 8:
21502 VRArrays = ArgVRM8s;
21503 break;
21504 }
21505
21506 for (unsigned i = 0; i < NF; ++i)
21507 if (StartReg)
21508 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
21509 else
21510 AllocatedPhysRegs.push_back(MCPhysReg());
21511}
21512
21513/// This function determines if each RVV argument is passed by register, if the
21514/// argument can be assigned to a VR, then give it a specific register.
21515/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21516void RVVArgDispatcher::compute() {
21517 uint32_t AssignedMap = 0;
21518 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21519 // Allocate first vector mask argument to V0.
21520 if (ArgInfo.FirstVMask) {
21521 AllocatedPhysRegs.push_back(RISCV::V0);
21522 return;
21523 }
21524
21525 unsigned RegsNeeded = divideCeil(
21526 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
21527 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21528 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21529 StartReg += RegsNeeded) {
21530 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21531 if ((AssignedMap & Map) == 0) {
21532 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
21533 AssignedMap |= Map;
21534 return;
21535 }
21536 }
21537
21538 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
21539 };
21540
21541 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21542 allocate(RVVArgInfos[i]);
21543}
21544
21546 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21547 return AllocatedPhysRegs[CurIdx++];
21548}
21549
21551
21552#define GET_RISCVVIntrinsicsTable_IMPL
21553#include "RISCVGenSearchableTables.inc"
21554
21555} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static bool IsSelect(MachineInstr &MI)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:977
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:311
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
iterator_range< arg_iterator > args()
Definition: Function.h:842
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
Argument * getArg(unsigned i) const
Definition: Function.h:836
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1881
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1834
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getScalarStoreSize() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:398
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:387
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:435
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:612
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:255
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1346
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1377
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1276
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1278
@ STRICT_FCEIL
Definition: ISDOpcodes.h:427
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1279
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1362
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1366
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1235
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1240
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1376
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1274
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1275
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:412
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:886
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:451
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1195
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1359
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1363
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ STRICT_LROUND
Definition: ISDOpcodes.h:432
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1277
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:587
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:647
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:508
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1378
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:431
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:881
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1272
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1218
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:857
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1336
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1255
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1280
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ STRICT_LRINT
Definition: ISDOpcodes.h:434
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:592
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ STRICT_FROUND
Definition: ISDOpcodes.h:429
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:450
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1379
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:428
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:430
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:923
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1270
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:444
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:466
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:443
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1271
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1189
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:471
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:637
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:435
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:613
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1269
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ STRICT_LLROUND
Definition: ISDOpcodes.h:433
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:424
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1367
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1153
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ STRICT_FRINT
Definition: ISDOpcodes.h:423
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:581
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1492
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1492
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1413
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1575
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1471
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:593
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1509
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1030
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:270
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:988
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:276
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:291
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)