LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267 if (RV64LegalI32 && Subtarget.is64Bit())
269
271
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
279
280 if (!RV64LegalI32) {
283 MVT::i32, Custom);
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
288 } else {
290 if (Subtarget.hasStdExtZbb()) {
293 }
294 }
296 } else {
298 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 nullptr);
300 setLibcallName(RTLIB::MULO_I64, nullptr);
301 }
302
303 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
305 if (RV64LegalI32 && Subtarget.is64Bit())
307 } else if (Subtarget.is64Bit()) {
309 if (!RV64LegalI32)
311 else
313 } else {
315 }
316
317 if (!Subtarget.hasStdExtM()) {
319 XLenVT, Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
333 Expand);
334 }
335
338 Expand);
339
341 Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
352 } else {
354 if (RV64LegalI32 && Subtarget.is64Bit())
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
383 Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
391 else
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
396 if (RV64LegalI32 && Subtarget.is64Bit())
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
410 } else
412 }
413 } else {
415 if (RV64LegalI32 && Subtarget.is64Bit())
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
429 if (RV64LegalI32 && Subtarget.is64Bit())
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
440
441 static const ISD::CondCode FPCCToExpand[] = {
445
446 static const unsigned FPOpToExpand[] = {
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
467
468 if (Subtarget.hasStdExtZfbfmin()) {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
499 }
500
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
506
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
526
527 if (!Subtarget.hasStdExtZfa())
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
550
551 if (Subtarget.hasStdExtZfa())
553 else
555 }
556
557 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
559
560 if (Subtarget.hasStdExtDOrZdinx()) {
561 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
562
563 if (!Subtarget.is64Bit())
565
566 if (Subtarget.hasStdExtZfa()) {
567 setOperationAction(FPRndMode, MVT::f64, Legal);
569 } else {
570 if (Subtarget.is64Bit())
571 setOperationAction(FPRndMode, MVT::f64, Custom);
572
574 }
575
578 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
582 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
583 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
584 setOperationAction(FPOpToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
587 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
588 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
592 Subtarget.isSoftFPABI() ? LibCall : Custom);
595 }
596
597 if (Subtarget.is64Bit()) {
600 MVT::i32, Custom);
602 }
603
604 if (Subtarget.hasStdExtFOrZfinx()) {
606 Custom);
607
610 XLenVT, Legal);
611
612 if (RV64LegalI32 && Subtarget.is64Bit())
615 MVT::i32, Legal);
616
619 }
620
623 XLenVT, Custom);
624
626
627 if (Subtarget.is64Bit())
629
630 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
631 // Unfortunately this can't be determined just from the ISA naming string.
633 Subtarget.is64Bit() ? Legal : Custom);
635 Subtarget.is64Bit() ? Legal : Custom);
636
639 if (Subtarget.is64Bit())
641
642 if (Subtarget.hasStdExtZicbop()) {
644 }
645
646 if (Subtarget.hasStdExtA()) {
648 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
650 else
652 } else if (Subtarget.hasForcedAtomics()) {
654 } else {
656 }
657
659
661
662 if (Subtarget.hasVInstructions()) {
664
666 if (RV64LegalI32 && Subtarget.is64Bit())
668
669 // RVV intrinsics may have illegal operands.
670 // We also need to custom legalize vmv.x.s.
673 {MVT::i8, MVT::i16}, Custom);
674 if (Subtarget.is64Bit())
676 MVT::i32, Custom);
677 else
679 MVT::i64, Custom);
680
682 MVT::Other, Custom);
683
684 static const unsigned IntegerVPOps[] = {
685 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
686 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
687 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
688 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
689 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
690 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
691 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
692 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
693 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
694 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
695 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
696 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
697 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
698 ISD::VP_USUBSAT};
699
700 static const unsigned FloatingPointVPOps[] = {
701 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
702 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
703 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
704 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
705 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
706 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
707 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
708 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
709 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
710 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
711 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
712 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
713 ISD::EXPERIMENTAL_VP_SPLICE};
714
715 static const unsigned IntegerVecReduceOps[] = {
719
720 static const unsigned FloatingPointVecReduceOps[] = {
723
724 if (!Subtarget.is64Bit()) {
725 // We must custom-lower certain vXi64 operations on RV32 due to the vector
726 // element type being illegal.
728 MVT::i64, Custom);
729
730 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
731
732 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
733 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
734 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
735 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
736 MVT::i64, Custom);
737 }
738
739 for (MVT VT : BoolVecVTs) {
740 if (!isTypeLegal(VT))
741 continue;
742
744
745 // Mask VTs are custom-expanded into a series of standard nodes
749 VT, Custom);
750
752 Custom);
753
756 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
757 Expand);
758
759 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
760
763 Custom);
764
766 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
767 Custom);
768
769 // RVV has native int->float & float->int conversions where the
770 // element type sizes are within one power-of-two of each other. Any
771 // wider distances between type sizes have to be lowered as sequences
772 // which progressively narrow the gap in stages.
777 VT, Custom);
779 Custom);
780
781 // Expand all extending loads to types larger than this, and truncating
782 // stores from types larger than this.
784 setTruncStoreAction(VT, OtherVT, Expand);
786 OtherVT, Expand);
787 }
788
789 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
790 ISD::VP_TRUNCATE, ISD::VP_SETCC},
791 VT, Custom);
792
795
797
798 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
799 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
800
803 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
804 }
805
806 for (MVT VT : IntVecVTs) {
807 if (!isTypeLegal(VT))
808 continue;
809
812
813 // Vectors implement MULHS/MULHU.
815
816 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
817 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
819
821 Legal);
822
824
825 // Custom-lower extensions and truncations from/to mask types.
827 VT, Custom);
828
829 // RVV has native int->float & float->int conversions where the
830 // element type sizes are within one power-of-two of each other. Any
831 // wider distances between type sizes have to be lowered as sequences
832 // which progressively narrow the gap in stages.
837 VT, Custom);
839 Custom);
842 VT, Legal);
843
844 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
845 // nodes which truncate by one power of two at a time.
847
848 // Custom-lower insert/extract operations to simplify patterns.
850 Custom);
851
852 // Custom-lower reduction operations to set up the corresponding custom
853 // nodes' operands.
854 setOperationAction(IntegerVecReduceOps, VT, Custom);
855
856 setOperationAction(IntegerVPOps, VT, Custom);
857
859
861 VT, Custom);
862
864 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
865 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
866 VT, Custom);
867
870 VT, Custom);
871
874
876
878 setTruncStoreAction(VT, OtherVT, Expand);
880 OtherVT, Expand);
881 }
882
885
886 // Splice
888
889 if (Subtarget.hasStdExtZvkb()) {
891 setOperationAction(ISD::VP_BSWAP, VT, Custom);
892 } else {
893 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
895 }
896
897 if (Subtarget.hasStdExtZvbb()) {
899 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
900 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
901 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
902 VT, Custom);
903 } else {
904 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
906 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
907 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
908 VT, Expand);
909
910 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
911 // range of f32.
912 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
913 if (isTypeLegal(FloatVT)) {
915 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
916 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
917 VT, Custom);
918 }
919 }
920 }
921
922 // Expand various CCs to best match the RVV ISA, which natively supports UNE
923 // but no other unordered comparisons, and supports all ordered comparisons
924 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
925 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
926 // and we pattern-match those back to the "original", swapping operands once
927 // more. This way we catch both operations and both "vf" and "fv" forms with
928 // fewer patterns.
929 static const ISD::CondCode VFPCCToExpand[] = {
933 };
934
935 // TODO: support more ops.
936 static const unsigned ZvfhminPromoteOps[] = {
944
945 // TODO: support more vp ops.
946 static const unsigned ZvfhminPromoteVPOps[] = {
947 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
948 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
949 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
950 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
951 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
952 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
953 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
954 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
955 ISD::VP_FMAXIMUM};
956
957 // Sets common operation actions on RVV floating-point vector types.
958 const auto SetCommonVFPActions = [&](MVT VT) {
960 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
961 // sizes are within one power-of-two of each other. Therefore conversions
962 // between vXf16 and vXf64 must be lowered as sequences which convert via
963 // vXf32.
966 // Custom-lower insert/extract operations to simplify patterns.
968 Custom);
969 // Expand various condition codes (explained above).
970 setCondCodeAction(VFPCCToExpand, VT, Expand);
971
974
978 VT, Custom);
979
980 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
981
982 // Expand FP operations that need libcalls.
994
996
998
1000 VT, Custom);
1001
1003 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1004 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1005 VT, Custom);
1006
1009
1012 VT, Custom);
1013
1016
1018
1019 setOperationAction(FloatingPointVPOps, VT, Custom);
1020
1022 Custom);
1025 VT, Legal);
1030 VT, Custom);
1031 };
1032
1033 // Sets common extload/truncstore actions on RVV floating-point vector
1034 // types.
1035 const auto SetCommonVFPExtLoadTruncStoreActions =
1036 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1037 for (auto SmallVT : SmallerVTs) {
1038 setTruncStoreAction(VT, SmallVT, Expand);
1039 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1040 }
1041 };
1042
1043 if (Subtarget.hasVInstructionsF16()) {
1044 for (MVT VT : F16VecVTs) {
1045 if (!isTypeLegal(VT))
1046 continue;
1047 SetCommonVFPActions(VT);
1048 }
1049 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1050 for (MVT VT : F16VecVTs) {
1051 if (!isTypeLegal(VT))
1052 continue;
1055 Custom);
1056 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1057 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1058 Custom);
1061 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1062 VT, Custom);
1065 VT, Custom);
1066 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1068 // load/store
1070
1071 // Custom split nxv32f16 since nxv32f32 if not legal.
1072 if (VT == MVT::nxv32f16) {
1073 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1074 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1075 continue;
1076 }
1077 // Add more promote ops.
1078 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1079 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1080 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1081 }
1082 }
1083
1084 if (Subtarget.hasVInstructionsF32()) {
1085 for (MVT VT : F32VecVTs) {
1086 if (!isTypeLegal(VT))
1087 continue;
1088 SetCommonVFPActions(VT);
1089 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1090 }
1091 }
1092
1093 if (Subtarget.hasVInstructionsF64()) {
1094 for (MVT VT : F64VecVTs) {
1095 if (!isTypeLegal(VT))
1096 continue;
1097 SetCommonVFPActions(VT);
1098 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1099 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1100 }
1101 }
1102
1103 if (Subtarget.useRVVForFixedLengthVectors()) {
1105 if (!useRVVForFixedLengthVectorVT(VT))
1106 continue;
1107
1108 // By default everything must be expanded.
1109 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1112 setTruncStoreAction(VT, OtherVT, Expand);
1114 OtherVT, Expand);
1115 }
1116
1117 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1118 // expansion to a build_vector of 0s.
1120
1121 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1123 Custom);
1124
1126 Custom);
1127
1129 VT, Custom);
1130
1132
1134
1136
1138
1140
1142
1145 Custom);
1146
1148 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1149 Custom);
1150
1152 {
1161 },
1162 VT, Custom);
1164 Custom);
1165
1167
1168 // Operations below are different for between masks and other vectors.
1169 if (VT.getVectorElementType() == MVT::i1) {
1170 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1171 ISD::OR, ISD::XOR},
1172 VT, Custom);
1173
1174 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1175 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1176 VT, Custom);
1177
1178 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1179 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1180 continue;
1181 }
1182
1183 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1184 // it before type legalization for i64 vectors on RV32. It will then be
1185 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1186 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1187 // improvements first.
1188 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1191 }
1192
1195
1196 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1197 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1198 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1199 ISD::VP_SCATTER},
1200 VT, Custom);
1201
1205 VT, Custom);
1206
1209
1211
1212 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1213 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1215
1218 VT, Custom);
1219
1222
1225
1226 // Custom-lower reduction operations to set up the corresponding custom
1227 // nodes' operands.
1231 VT, Custom);
1232
1233 setOperationAction(IntegerVPOps, VT, Custom);
1234
1235 if (Subtarget.hasStdExtZvkb())
1237
1238 if (Subtarget.hasStdExtZvbb()) {
1241 VT, Custom);
1242 } else {
1243 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1244 // range of f32.
1245 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1246 if (isTypeLegal(FloatVT))
1249 Custom);
1250 }
1251 }
1252
1254 // There are no extending loads or truncating stores.
1255 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1256 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1257 setTruncStoreAction(VT, InnerVT, Expand);
1258 }
1259
1260 if (!useRVVForFixedLengthVectorVT(VT))
1261 continue;
1262
1263 // By default everything must be expanded.
1264 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1266
1267 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1268 // expansion to a build_vector of 0s.
1270
1271 if (VT.getVectorElementType() == MVT::f16 &&
1272 !Subtarget.hasVInstructionsF16()) {
1275 Custom);
1276 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1278 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1279 Custom);
1281 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1282 VT, Custom);
1285 VT, Custom);
1288 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1289 // Don't promote f16 vector operations to f32 if f32 vector type is
1290 // not legal.
1291 // TODO: could split the f16 vector into two vectors and do promotion.
1292 if (!isTypeLegal(F32VecVT))
1293 continue;
1294 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1295 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1296 continue;
1297 }
1298
1299 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1301 Custom);
1302
1306 VT, Custom);
1307
1310 VT, Custom);
1311
1312 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1313 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1314 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1315 ISD::VP_SCATTER},
1316 VT, Custom);
1317
1322 VT, Custom);
1323
1325
1328 VT, Custom);
1329
1330 setCondCodeAction(VFPCCToExpand, VT, Expand);
1331
1335
1337
1338 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1339
1340 setOperationAction(FloatingPointVPOps, VT, Custom);
1341
1343 Custom);
1350 VT, Custom);
1351 }
1352
1353 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1354 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1355 Custom);
1356 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1358 if (Subtarget.hasStdExtFOrZfinx())
1360 if (Subtarget.hasStdExtDOrZdinx())
1362 }
1363 }
1364
1365 if (Subtarget.hasStdExtA()) {
1367 if (RV64LegalI32 && Subtarget.is64Bit())
1369 }
1370
1371 if (Subtarget.hasForcedAtomics()) {
1372 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1378 XLenVT, LibCall);
1379 }
1380
1381 if (Subtarget.hasVendorXTHeadMemIdx()) {
1382 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1383 setIndexedLoadAction(im, MVT::i8, Legal);
1384 setIndexedStoreAction(im, MVT::i8, Legal);
1385 setIndexedLoadAction(im, MVT::i16, Legal);
1386 setIndexedStoreAction(im, MVT::i16, Legal);
1387 setIndexedLoadAction(im, MVT::i32, Legal);
1388 setIndexedStoreAction(im, MVT::i32, Legal);
1389
1390 if (Subtarget.is64Bit()) {
1391 setIndexedLoadAction(im, MVT::i64, Legal);
1392 setIndexedStoreAction(im, MVT::i64, Legal);
1393 }
1394 }
1395 }
1396
1397 // Function alignments.
1398 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1399 setMinFunctionAlignment(FunctionAlignment);
1400 // Set preferred alignments.
1403
1407 if (Subtarget.is64Bit())
1409
1410 if (Subtarget.hasStdExtFOrZfinx())
1412
1413 if (Subtarget.hasStdExtZbb())
1415
1416 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1418
1419 if (Subtarget.hasStdExtZbkb())
1421 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1423 if (Subtarget.hasStdExtFOrZfinx())
1426 if (Subtarget.hasVInstructions())
1428 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1431 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1434 if (Subtarget.hasVendorXTHeadMemPair())
1436 if (Subtarget.useRVVForFixedLengthVectors())
1438
1439 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1440 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1441
1442 // Disable strict node mutation.
1443 IsStrictFPEnabled = true;
1444}
1445
1447 LLVMContext &Context,
1448 EVT VT) const {
1449 if (!VT.isVector())
1450 return getPointerTy(DL);
1451 if (Subtarget.hasVInstructions() &&
1452 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1453 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1455}
1456
1457MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1458 return Subtarget.getXLenVT();
1459}
1460
1461// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1462bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1463 unsigned VF,
1464 bool IsScalable) const {
1465 if (!Subtarget.hasVInstructions())
1466 return true;
1467
1468 if (!IsScalable)
1469 return true;
1470
1471 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1472 return true;
1473
1474 // Don't allow VF=1 if those types are't legal.
1475 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1476 return true;
1477
1478 // VLEN=32 support is incomplete.
1479 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1480 return true;
1481
1482 // The maximum VF is for the smallest element width with LMUL=8.
1483 // VF must be a power of 2.
1484 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1485 return VF > MaxVF || !isPowerOf2_32(VF);
1486}
1487
1488bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
1489 return !Subtarget.hasVInstructions() ||
1490 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1491}
1492
1494 const CallInst &I,
1495 MachineFunction &MF,
1496 unsigned Intrinsic) const {
1497 auto &DL = I.getModule()->getDataLayout();
1498
1499 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1500 bool IsUnitStrided, bool UsePtrVal = false) {
1502 // We can't use ptrVal if the intrinsic can access memory before the
1503 // pointer. This means we can't use it for strided or indexed intrinsics.
1504 if (UsePtrVal)
1505 Info.ptrVal = I.getArgOperand(PtrOp);
1506 else
1507 Info.fallbackAddressSpace =
1508 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1509 Type *MemTy;
1510 if (IsStore) {
1511 // Store value is the first operand.
1512 MemTy = I.getArgOperand(0)->getType();
1513 } else {
1514 // Use return type. If it's segment load, return type is a struct.
1515 MemTy = I.getType();
1516 if (MemTy->isStructTy())
1517 MemTy = MemTy->getStructElementType(0);
1518 }
1519 if (!IsUnitStrided)
1520 MemTy = MemTy->getScalarType();
1521
1522 Info.memVT = getValueType(DL, MemTy);
1523 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1525 Info.flags |=
1527 return true;
1528 };
1529
1530 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1532
1534 switch (Intrinsic) {
1535 default:
1536 return false;
1537 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1538 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1539 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1540 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1541 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1542 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1543 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1544 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1545 case Intrinsic::riscv_masked_cmpxchg_i32:
1547 Info.memVT = MVT::i32;
1548 Info.ptrVal = I.getArgOperand(0);
1549 Info.offset = 0;
1550 Info.align = Align(4);
1553 return true;
1554 case Intrinsic::riscv_masked_strided_load:
1555 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1556 /*IsUnitStrided*/ false);
1557 case Intrinsic::riscv_masked_strided_store:
1558 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1559 /*IsUnitStrided*/ false);
1560 case Intrinsic::riscv_seg2_load:
1561 case Intrinsic::riscv_seg3_load:
1562 case Intrinsic::riscv_seg4_load:
1563 case Intrinsic::riscv_seg5_load:
1564 case Intrinsic::riscv_seg6_load:
1565 case Intrinsic::riscv_seg7_load:
1566 case Intrinsic::riscv_seg8_load:
1567 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1568 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1569 case Intrinsic::riscv_seg2_store:
1570 case Intrinsic::riscv_seg3_store:
1571 case Intrinsic::riscv_seg4_store:
1572 case Intrinsic::riscv_seg5_store:
1573 case Intrinsic::riscv_seg6_store:
1574 case Intrinsic::riscv_seg7_store:
1575 case Intrinsic::riscv_seg8_store:
1576 // Operands are (vec, ..., vec, ptr, vl)
1577 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1578 /*IsStore*/ true,
1579 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1580 case Intrinsic::riscv_vle:
1581 case Intrinsic::riscv_vle_mask:
1582 case Intrinsic::riscv_vleff:
1583 case Intrinsic::riscv_vleff_mask:
1584 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1585 /*IsStore*/ false,
1586 /*IsUnitStrided*/ true,
1587 /*UsePtrVal*/ true);
1588 case Intrinsic::riscv_vse:
1589 case Intrinsic::riscv_vse_mask:
1590 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1591 /*IsStore*/ true,
1592 /*IsUnitStrided*/ true,
1593 /*UsePtrVal*/ true);
1594 case Intrinsic::riscv_vlse:
1595 case Intrinsic::riscv_vlse_mask:
1596 case Intrinsic::riscv_vloxei:
1597 case Intrinsic::riscv_vloxei_mask:
1598 case Intrinsic::riscv_vluxei:
1599 case Intrinsic::riscv_vluxei_mask:
1600 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1601 /*IsStore*/ false,
1602 /*IsUnitStrided*/ false);
1603 case Intrinsic::riscv_vsse:
1604 case Intrinsic::riscv_vsse_mask:
1605 case Intrinsic::riscv_vsoxei:
1606 case Intrinsic::riscv_vsoxei_mask:
1607 case Intrinsic::riscv_vsuxei:
1608 case Intrinsic::riscv_vsuxei_mask:
1609 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1610 /*IsStore*/ true,
1611 /*IsUnitStrided*/ false);
1612 case Intrinsic::riscv_vlseg2:
1613 case Intrinsic::riscv_vlseg3:
1614 case Intrinsic::riscv_vlseg4:
1615 case Intrinsic::riscv_vlseg5:
1616 case Intrinsic::riscv_vlseg6:
1617 case Intrinsic::riscv_vlseg7:
1618 case Intrinsic::riscv_vlseg8:
1619 case Intrinsic::riscv_vlseg2ff:
1620 case Intrinsic::riscv_vlseg3ff:
1621 case Intrinsic::riscv_vlseg4ff:
1622 case Intrinsic::riscv_vlseg5ff:
1623 case Intrinsic::riscv_vlseg6ff:
1624 case Intrinsic::riscv_vlseg7ff:
1625 case Intrinsic::riscv_vlseg8ff:
1626 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1627 /*IsStore*/ false,
1628 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1629 case Intrinsic::riscv_vlseg2_mask:
1630 case Intrinsic::riscv_vlseg3_mask:
1631 case Intrinsic::riscv_vlseg4_mask:
1632 case Intrinsic::riscv_vlseg5_mask:
1633 case Intrinsic::riscv_vlseg6_mask:
1634 case Intrinsic::riscv_vlseg7_mask:
1635 case Intrinsic::riscv_vlseg8_mask:
1636 case Intrinsic::riscv_vlseg2ff_mask:
1637 case Intrinsic::riscv_vlseg3ff_mask:
1638 case Intrinsic::riscv_vlseg4ff_mask:
1639 case Intrinsic::riscv_vlseg5ff_mask:
1640 case Intrinsic::riscv_vlseg6ff_mask:
1641 case Intrinsic::riscv_vlseg7ff_mask:
1642 case Intrinsic::riscv_vlseg8ff_mask:
1643 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1644 /*IsStore*/ false,
1645 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1646 case Intrinsic::riscv_vlsseg2:
1647 case Intrinsic::riscv_vlsseg3:
1648 case Intrinsic::riscv_vlsseg4:
1649 case Intrinsic::riscv_vlsseg5:
1650 case Intrinsic::riscv_vlsseg6:
1651 case Intrinsic::riscv_vlsseg7:
1652 case Intrinsic::riscv_vlsseg8:
1653 case Intrinsic::riscv_vloxseg2:
1654 case Intrinsic::riscv_vloxseg3:
1655 case Intrinsic::riscv_vloxseg4:
1656 case Intrinsic::riscv_vloxseg5:
1657 case Intrinsic::riscv_vloxseg6:
1658 case Intrinsic::riscv_vloxseg7:
1659 case Intrinsic::riscv_vloxseg8:
1660 case Intrinsic::riscv_vluxseg2:
1661 case Intrinsic::riscv_vluxseg3:
1662 case Intrinsic::riscv_vluxseg4:
1663 case Intrinsic::riscv_vluxseg5:
1664 case Intrinsic::riscv_vluxseg6:
1665 case Intrinsic::riscv_vluxseg7:
1666 case Intrinsic::riscv_vluxseg8:
1667 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1668 /*IsStore*/ false,
1669 /*IsUnitStrided*/ false);
1670 case Intrinsic::riscv_vlsseg2_mask:
1671 case Intrinsic::riscv_vlsseg3_mask:
1672 case Intrinsic::riscv_vlsseg4_mask:
1673 case Intrinsic::riscv_vlsseg5_mask:
1674 case Intrinsic::riscv_vlsseg6_mask:
1675 case Intrinsic::riscv_vlsseg7_mask:
1676 case Intrinsic::riscv_vlsseg8_mask:
1677 case Intrinsic::riscv_vloxseg2_mask:
1678 case Intrinsic::riscv_vloxseg3_mask:
1679 case Intrinsic::riscv_vloxseg4_mask:
1680 case Intrinsic::riscv_vloxseg5_mask:
1681 case Intrinsic::riscv_vloxseg6_mask:
1682 case Intrinsic::riscv_vloxseg7_mask:
1683 case Intrinsic::riscv_vloxseg8_mask:
1684 case Intrinsic::riscv_vluxseg2_mask:
1685 case Intrinsic::riscv_vluxseg3_mask:
1686 case Intrinsic::riscv_vluxseg4_mask:
1687 case Intrinsic::riscv_vluxseg5_mask:
1688 case Intrinsic::riscv_vluxseg6_mask:
1689 case Intrinsic::riscv_vluxseg7_mask:
1690 case Intrinsic::riscv_vluxseg8_mask:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1692 /*IsStore*/ false,
1693 /*IsUnitStrided*/ false);
1694 case Intrinsic::riscv_vsseg2:
1695 case Intrinsic::riscv_vsseg3:
1696 case Intrinsic::riscv_vsseg4:
1697 case Intrinsic::riscv_vsseg5:
1698 case Intrinsic::riscv_vsseg6:
1699 case Intrinsic::riscv_vsseg7:
1700 case Intrinsic::riscv_vsseg8:
1701 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1702 /*IsStore*/ true,
1703 /*IsUnitStrided*/ false);
1704 case Intrinsic::riscv_vsseg2_mask:
1705 case Intrinsic::riscv_vsseg3_mask:
1706 case Intrinsic::riscv_vsseg4_mask:
1707 case Intrinsic::riscv_vsseg5_mask:
1708 case Intrinsic::riscv_vsseg6_mask:
1709 case Intrinsic::riscv_vsseg7_mask:
1710 case Intrinsic::riscv_vsseg8_mask:
1711 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1712 /*IsStore*/ true,
1713 /*IsUnitStrided*/ false);
1714 case Intrinsic::riscv_vssseg2:
1715 case Intrinsic::riscv_vssseg3:
1716 case Intrinsic::riscv_vssseg4:
1717 case Intrinsic::riscv_vssseg5:
1718 case Intrinsic::riscv_vssseg6:
1719 case Intrinsic::riscv_vssseg7:
1720 case Intrinsic::riscv_vssseg8:
1721 case Intrinsic::riscv_vsoxseg2:
1722 case Intrinsic::riscv_vsoxseg3:
1723 case Intrinsic::riscv_vsoxseg4:
1724 case Intrinsic::riscv_vsoxseg5:
1725 case Intrinsic::riscv_vsoxseg6:
1726 case Intrinsic::riscv_vsoxseg7:
1727 case Intrinsic::riscv_vsoxseg8:
1728 case Intrinsic::riscv_vsuxseg2:
1729 case Intrinsic::riscv_vsuxseg3:
1730 case Intrinsic::riscv_vsuxseg4:
1731 case Intrinsic::riscv_vsuxseg5:
1732 case Intrinsic::riscv_vsuxseg6:
1733 case Intrinsic::riscv_vsuxseg7:
1734 case Intrinsic::riscv_vsuxseg8:
1735 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1736 /*IsStore*/ true,
1737 /*IsUnitStrided*/ false);
1738 case Intrinsic::riscv_vssseg2_mask:
1739 case Intrinsic::riscv_vssseg3_mask:
1740 case Intrinsic::riscv_vssseg4_mask:
1741 case Intrinsic::riscv_vssseg5_mask:
1742 case Intrinsic::riscv_vssseg6_mask:
1743 case Intrinsic::riscv_vssseg7_mask:
1744 case Intrinsic::riscv_vssseg8_mask:
1745 case Intrinsic::riscv_vsoxseg2_mask:
1746 case Intrinsic::riscv_vsoxseg3_mask:
1747 case Intrinsic::riscv_vsoxseg4_mask:
1748 case Intrinsic::riscv_vsoxseg5_mask:
1749 case Intrinsic::riscv_vsoxseg6_mask:
1750 case Intrinsic::riscv_vsoxseg7_mask:
1751 case Intrinsic::riscv_vsoxseg8_mask:
1752 case Intrinsic::riscv_vsuxseg2_mask:
1753 case Intrinsic::riscv_vsuxseg3_mask:
1754 case Intrinsic::riscv_vsuxseg4_mask:
1755 case Intrinsic::riscv_vsuxseg5_mask:
1756 case Intrinsic::riscv_vsuxseg6_mask:
1757 case Intrinsic::riscv_vsuxseg7_mask:
1758 case Intrinsic::riscv_vsuxseg8_mask:
1759 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1760 /*IsStore*/ true,
1761 /*IsUnitStrided*/ false);
1762 }
1763}
1764
1766 const AddrMode &AM, Type *Ty,
1767 unsigned AS,
1768 Instruction *I) const {
1769 // No global is ever allowed as a base.
1770 if (AM.BaseGV)
1771 return false;
1772
1773 // RVV instructions only support register addressing.
1774 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1775 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1776
1777 // Require a 12-bit signed offset.
1778 if (!isInt<12>(AM.BaseOffs))
1779 return false;
1780
1781 switch (AM.Scale) {
1782 case 0: // "r+i" or just "i", depending on HasBaseReg.
1783 break;
1784 case 1:
1785 if (!AM.HasBaseReg) // allow "r+i".
1786 break;
1787 return false; // disallow "r+r" or "r+r+i".
1788 default:
1789 return false;
1790 }
1791
1792 return true;
1793}
1794
1796 return isInt<12>(Imm);
1797}
1798
1800 return isInt<12>(Imm);
1801}
1802
1803// On RV32, 64-bit integers are split into their high and low parts and held
1804// in two different registers, so the trunc is free since the low register can
1805// just be used.
1806// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1807// isTruncateFree?
1809 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1810 return false;
1811 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1812 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1813 return (SrcBits == 64 && DestBits == 32);
1814}
1815
1817 // We consider i64->i32 free on RV64 since we have good selection of W
1818 // instructions that make promoting operations back to i64 free in many cases.
1819 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1820 !DstVT.isInteger())
1821 return false;
1822 unsigned SrcBits = SrcVT.getSizeInBits();
1823 unsigned DestBits = DstVT.getSizeInBits();
1824 return (SrcBits == 64 && DestBits == 32);
1825}
1826
1828 // Zexts are free if they can be combined with a load.
1829 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1830 // poorly with type legalization of compares preferring sext.
1831 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1832 EVT MemVT = LD->getMemoryVT();
1833 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1834 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1835 LD->getExtensionType() == ISD::ZEXTLOAD))
1836 return true;
1837 }
1838
1839 return TargetLowering::isZExtFree(Val, VT2);
1840}
1841
1843 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1844}
1845
1847 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1848}
1849
1851 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1852}
1853
1855 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1856 Subtarget.hasVendorXCVbitmanip();
1857}
1858
1860 const Instruction &AndI) const {
1861 // We expect to be able to match a bit extraction instruction if the Zbs
1862 // extension is supported and the mask is a power of two. However, we
1863 // conservatively return false if the mask would fit in an ANDI instruction,
1864 // on the basis that it's possible the sinking+duplication of the AND in
1865 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1866 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1867 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1868 return false;
1869 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1870 if (!Mask)
1871 return false;
1872 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1873}
1874
1876 EVT VT = Y.getValueType();
1877
1878 // FIXME: Support vectors once we have tests.
1879 if (VT.isVector())
1880 return false;
1881
1882 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1883 !isa<ConstantSDNode>(Y);
1884}
1885
1887 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1888 if (Subtarget.hasStdExtZbs())
1889 return X.getValueType().isScalarInteger();
1890 auto *C = dyn_cast<ConstantSDNode>(Y);
1891 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1892 if (Subtarget.hasVendorXTHeadBs())
1893 return C != nullptr;
1894 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1895 return C && C->getAPIntValue().ule(10);
1896}
1897
1899 EVT VT) const {
1900 // Only enable for rvv.
1901 if (!VT.isVector() || !Subtarget.hasVInstructions())
1902 return false;
1903
1904 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1905 return false;
1906
1907 return true;
1908}
1909
1911 Type *Ty) const {
1912 assert(Ty->isIntegerTy());
1913
1914 unsigned BitSize = Ty->getIntegerBitWidth();
1915 if (BitSize > Subtarget.getXLen())
1916 return false;
1917
1918 // Fast path, assume 32-bit immediates are cheap.
1919 int64_t Val = Imm.getSExtValue();
1920 if (isInt<32>(Val))
1921 return true;
1922
1923 // A constant pool entry may be more aligned thant he load we're trying to
1924 // replace. If we don't support unaligned scalar mem, prefer the constant
1925 // pool.
1926 // TODO: Can the caller pass down the alignment?
1927 if (!Subtarget.enableUnalignedScalarMem())
1928 return true;
1929
1930 // Prefer to keep the load if it would require many instructions.
1931 // This uses the same threshold we use for constant pools but doesn't
1932 // check useConstantPoolForLargeInts.
1933 // TODO: Should we keep the load only when we're definitely going to emit a
1934 // constant pool?
1935
1937 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1938}
1939
1943 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1944 SelectionDAG &DAG) const {
1945 // One interesting pattern that we'd want to form is 'bit extract':
1946 // ((1 >> Y) & 1) ==/!= 0
1947 // But we also need to be careful not to try to reverse that fold.
1948
1949 // Is this '((1 >> Y) & 1)'?
1950 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1951 return false; // Keep the 'bit extract' pattern.
1952
1953 // Will this be '((1 >> Y) & 1)' after the transform?
1954 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1955 return true; // Do form the 'bit extract' pattern.
1956
1957 // If 'X' is a constant, and we transform, then we will immediately
1958 // try to undo the fold, thus causing endless combine loop.
1959 // So only do the transform if X is not a constant. This matches the default
1960 // implementation of this function.
1961 return !XC;
1962}
1963
1964bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1965 switch (Opcode) {
1966 case Instruction::Add:
1967 case Instruction::Sub:
1968 case Instruction::Mul:
1969 case Instruction::And:
1970 case Instruction::Or:
1971 case Instruction::Xor:
1972 case Instruction::FAdd:
1973 case Instruction::FSub:
1974 case Instruction::FMul:
1975 case Instruction::FDiv:
1976 case Instruction::ICmp:
1977 case Instruction::FCmp:
1978 return true;
1979 case Instruction::Shl:
1980 case Instruction::LShr:
1981 case Instruction::AShr:
1982 case Instruction::UDiv:
1983 case Instruction::SDiv:
1984 case Instruction::URem:
1985 case Instruction::SRem:
1986 return Operand == 1;
1987 default:
1988 return false;
1989 }
1990}
1991
1992
1994 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1995 return false;
1996
1997 if (canSplatOperand(I->getOpcode(), Operand))
1998 return true;
1999
2000 auto *II = dyn_cast<IntrinsicInst>(I);
2001 if (!II)
2002 return false;
2003
2004 switch (II->getIntrinsicID()) {
2005 case Intrinsic::fma:
2006 case Intrinsic::vp_fma:
2007 return Operand == 0 || Operand == 1;
2008 case Intrinsic::vp_shl:
2009 case Intrinsic::vp_lshr:
2010 case Intrinsic::vp_ashr:
2011 case Intrinsic::vp_udiv:
2012 case Intrinsic::vp_sdiv:
2013 case Intrinsic::vp_urem:
2014 case Intrinsic::vp_srem:
2015 case Intrinsic::ssub_sat:
2016 case Intrinsic::vp_ssub_sat:
2017 case Intrinsic::usub_sat:
2018 case Intrinsic::vp_usub_sat:
2019 return Operand == 1;
2020 // These intrinsics are commutative.
2021 case Intrinsic::vp_add:
2022 case Intrinsic::vp_mul:
2023 case Intrinsic::vp_and:
2024 case Intrinsic::vp_or:
2025 case Intrinsic::vp_xor:
2026 case Intrinsic::vp_fadd:
2027 case Intrinsic::vp_fmul:
2028 case Intrinsic::vp_icmp:
2029 case Intrinsic::vp_fcmp:
2030 case Intrinsic::smin:
2031 case Intrinsic::vp_smin:
2032 case Intrinsic::umin:
2033 case Intrinsic::vp_umin:
2034 case Intrinsic::smax:
2035 case Intrinsic::vp_smax:
2036 case Intrinsic::umax:
2037 case Intrinsic::vp_umax:
2038 case Intrinsic::sadd_sat:
2039 case Intrinsic::vp_sadd_sat:
2040 case Intrinsic::uadd_sat:
2041 case Intrinsic::vp_uadd_sat:
2042 // These intrinsics have 'vr' versions.
2043 case Intrinsic::vp_sub:
2044 case Intrinsic::vp_fsub:
2045 case Intrinsic::vp_fdiv:
2046 return Operand == 0 || Operand == 1;
2047 default:
2048 return false;
2049 }
2050}
2051
2052/// Check if sinking \p I's operands to I's basic block is profitable, because
2053/// the operands can be folded into a target instruction, e.g.
2054/// splats of scalars can fold into vector instructions.
2056 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2057 using namespace llvm::PatternMatch;
2058
2059 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2060 return false;
2061
2062 // Don't sink splat operands if the target prefers it. Some targets requires
2063 // S2V transfer buffers and we can run out of them copying the same value
2064 // repeatedly.
2065 // FIXME: It could still be worth doing if it would improve vector register
2066 // pressure and prevent a vector spill.
2067 if (!Subtarget.sinkSplatOperands())
2068 return false;
2069
2070 for (auto OpIdx : enumerate(I->operands())) {
2071 if (!canSplatOperand(I, OpIdx.index()))
2072 continue;
2073
2074 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2075 // Make sure we are not already sinking this operand
2076 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2077 continue;
2078
2079 // We are looking for a splat that can be sunk.
2081 m_Undef(), m_ZeroMask())))
2082 continue;
2083
2084 // Don't sink i1 splats.
2085 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2086 continue;
2087
2088 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2089 // and vector registers
2090 for (Use &U : Op->uses()) {
2091 Instruction *Insn = cast<Instruction>(U.getUser());
2092 if (!canSplatOperand(Insn, U.getOperandNo()))
2093 return false;
2094 }
2095
2096 Ops.push_back(&Op->getOperandUse(0));
2097 Ops.push_back(&OpIdx.value());
2098 }
2099 return true;
2100}
2101
2103 unsigned Opc = VecOp.getOpcode();
2104
2105 // Assume target opcodes can't be scalarized.
2106 // TODO - do we have any exceptions?
2107 if (Opc >= ISD::BUILTIN_OP_END)
2108 return false;
2109
2110 // If the vector op is not supported, try to convert to scalar.
2111 EVT VecVT = VecOp.getValueType();
2112 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2113 return true;
2114
2115 // If the vector op is supported, but the scalar op is not, the transform may
2116 // not be worthwhile.
2117 // Permit a vector binary operation can be converted to scalar binary
2118 // operation which is custom lowered with illegal type.
2119 EVT ScalarVT = VecVT.getScalarType();
2120 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2121 isOperationCustom(Opc, ScalarVT);
2122}
2123
2125 const GlobalAddressSDNode *GA) const {
2126 // In order to maximise the opportunity for common subexpression elimination,
2127 // keep a separate ADD node for the global address offset instead of folding
2128 // it in the global address node. Later peephole optimisations may choose to
2129 // fold it back in when profitable.
2130 return false;
2131}
2132
2133// Return one of the followings:
2134// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2135// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2136// positive counterpart, which will be materialized from the first returned
2137// element. The second returned element indicated that there should be a FNEG
2138// followed.
2139// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2140std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2141 EVT VT) const {
2142 if (!Subtarget.hasStdExtZfa())
2143 return std::make_pair(-1, false);
2144
2145 bool IsSupportedVT = false;
2146 if (VT == MVT::f16) {
2147 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2148 } else if (VT == MVT::f32) {
2149 IsSupportedVT = true;
2150 } else if (VT == MVT::f64) {
2151 assert(Subtarget.hasStdExtD() && "Expect D extension");
2152 IsSupportedVT = true;
2153 }
2154
2155 if (!IsSupportedVT)
2156 return std::make_pair(-1, false);
2157
2159 if (Index < 0 && Imm.isNegative())
2160 // Try the combination of its positive counterpart + FNEG.
2161 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2162 else
2163 return std::make_pair(Index, false);
2164}
2165
2167 bool ForCodeSize) const {
2168 bool IsLegalVT = false;
2169 if (VT == MVT::f16)
2170 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2171 else if (VT == MVT::f32)
2172 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2173 else if (VT == MVT::f64)
2174 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2175 else if (VT == MVT::bf16)
2176 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2177
2178 if (!IsLegalVT)
2179 return false;
2180
2181 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2182 return true;
2183
2184 // Cannot create a 64 bit floating-point immediate value for rv32.
2185 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2186 // td can handle +0.0 or -0.0 already.
2187 // -0.0 can be created by fmv + fneg.
2188 return Imm.isZero();
2189 }
2190
2191 // Special case: fmv + fneg
2192 if (Imm.isNegZero())
2193 return true;
2194
2195 // Building an integer and then converting requires a fmv at the end of
2196 // the integer sequence.
2197 const int Cost =
2198 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2199 Subtarget);
2200 return Cost <= FPImmCost;
2201}
2202
2203// TODO: This is very conservative.
2205 unsigned Index) const {
2207 return false;
2208
2209 // Only support extracting a fixed from a fixed vector for now.
2210 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2211 return false;
2212
2213 EVT EltVT = ResVT.getVectorElementType();
2214 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2215
2216 // The smallest type we can slide is i8.
2217 // TODO: We can extract index 0 from a mask vector without a slide.
2218 if (EltVT == MVT::i1)
2219 return false;
2220
2221 unsigned ResElts = ResVT.getVectorNumElements();
2222 unsigned SrcElts = SrcVT.getVectorNumElements();
2223
2224 unsigned MinVLen = Subtarget.getRealMinVLen();
2225 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2226
2227 // If we're extracting only data from the first VLEN bits of the source
2228 // then we can always do this with an m1 vslidedown.vx. Restricting the
2229 // Index ensures we can use a vslidedown.vi.
2230 // TODO: We can generalize this when the exact VLEN is known.
2231 if (Index + ResElts <= MinVLMAX && Index < 31)
2232 return true;
2233
2234 // Convervatively only handle extracting half of a vector.
2235 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2236 // a cheap extract. However, this case is important in practice for
2237 // shuffled extracts of longer vectors. How resolve?
2238 if ((ResElts * 2) != SrcElts)
2239 return false;
2240
2241 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2242 // cheap.
2243 if (Index >= 32)
2244 return false;
2245
2246 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2247 // the upper half of a vector until we have more test coverage.
2248 return Index == 0 || Index == ResElts;
2249}
2250
2253 EVT VT) const {
2254 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2255 // We might still end up using a GPR but that will be decided based on ABI.
2256 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2257 !Subtarget.hasStdExtZfhminOrZhinxmin())
2258 return MVT::f32;
2259
2261
2262 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2263 return MVT::i64;
2264
2265 return PartVT;
2266}
2267
2270 EVT VT) const {
2271 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2272 // We might still end up using a GPR but that will be decided based on ABI.
2273 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2274 !Subtarget.hasStdExtZfhminOrZhinxmin())
2275 return 1;
2276
2278}
2279
2281 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2282 unsigned &NumIntermediates, MVT &RegisterVT) const {
2284 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2285
2286 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2287 IntermediateVT = MVT::i64;
2288
2289 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2290 RegisterVT = MVT::i64;
2291
2292 return NumRegs;
2293}
2294
2295// Changes the condition code and swaps operands if necessary, so the SetCC
2296// operation matches one of the comparisons supported directly by branches
2297// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2298// with 1/-1.
2299static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2300 ISD::CondCode &CC, SelectionDAG &DAG) {
2301 // If this is a single bit test that can't be handled by ANDI, shift the
2302 // bit to be tested to the MSB and perform a signed compare with 0.
2303 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2304 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2305 isa<ConstantSDNode>(LHS.getOperand(1))) {
2306 uint64_t Mask = LHS.getConstantOperandVal(1);
2307 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2308 unsigned ShAmt = 0;
2309 if (isPowerOf2_64(Mask)) {
2311 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2312 } else {
2313 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2314 }
2315
2316 LHS = LHS.getOperand(0);
2317 if (ShAmt != 0)
2318 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2319 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2320 return;
2321 }
2322 }
2323
2324 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2325 int64_t C = RHSC->getSExtValue();
2326 switch (CC) {
2327 default: break;
2328 case ISD::SETGT:
2329 // Convert X > -1 to X >= 0.
2330 if (C == -1) {
2331 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2332 CC = ISD::SETGE;
2333 return;
2334 }
2335 break;
2336 case ISD::SETLT:
2337 // Convert X < 1 to 0 >= X.
2338 if (C == 1) {
2339 RHS = LHS;
2340 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2341 CC = ISD::SETGE;
2342 return;
2343 }
2344 break;
2345 }
2346 }
2347
2348 switch (CC) {
2349 default:
2350 break;
2351 case ISD::SETGT:
2352 case ISD::SETLE:
2353 case ISD::SETUGT:
2354 case ISD::SETULE:
2356 std::swap(LHS, RHS);
2357 break;
2358 }
2359}
2360
2362 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2363 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2364 if (VT.getVectorElementType() == MVT::i1)
2365 KnownSize *= 8;
2366
2367 switch (KnownSize) {
2368 default:
2369 llvm_unreachable("Invalid LMUL.");
2370 case 8:
2372 case 16:
2374 case 32:
2376 case 64:
2378 case 128:
2380 case 256:
2382 case 512:
2384 }
2385}
2386
2388 switch (LMul) {
2389 default:
2390 llvm_unreachable("Invalid LMUL.");
2395 return RISCV::VRRegClassID;
2397 return RISCV::VRM2RegClassID;
2399 return RISCV::VRM4RegClassID;
2401 return RISCV::VRM8RegClassID;
2402 }
2403}
2404
2406 RISCVII::VLMUL LMUL = getLMUL(VT);
2407 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2408 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2409 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2410 LMUL == RISCVII::VLMUL::LMUL_1) {
2411 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2412 "Unexpected subreg numbering");
2413 return RISCV::sub_vrm1_0 + Index;
2414 }
2415 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2416 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2417 "Unexpected subreg numbering");
2418 return RISCV::sub_vrm2_0 + Index;
2419 }
2420 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2421 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2422 "Unexpected subreg numbering");
2423 return RISCV::sub_vrm4_0 + Index;
2424 }
2425 llvm_unreachable("Invalid vector type.");
2426}
2427
2429 if (VT.getVectorElementType() == MVT::i1)
2430 return RISCV::VRRegClassID;
2431 return getRegClassIDForLMUL(getLMUL(VT));
2432}
2433
2434// Attempt to decompose a subvector insert/extract between VecVT and
2435// SubVecVT via subregister indices. Returns the subregister index that
2436// can perform the subvector insert/extract with the given element index, as
2437// well as the index corresponding to any leftover subvectors that must be
2438// further inserted/extracted within the register class for SubVecVT.
2439std::pair<unsigned, unsigned>
2441 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2442 const RISCVRegisterInfo *TRI) {
2443 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2444 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2445 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2446 "Register classes not ordered");
2447 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2448 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2449 // Try to compose a subregister index that takes us from the incoming
2450 // LMUL>1 register class down to the outgoing one. At each step we half
2451 // the LMUL:
2452 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2453 // Note that this is not guaranteed to find a subregister index, such as
2454 // when we are extracting from one VR type to another.
2455 unsigned SubRegIdx = RISCV::NoSubRegister;
2456 for (const unsigned RCID :
2457 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2458 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2459 VecVT = VecVT.getHalfNumVectorElementsVT();
2460 bool IsHi =
2461 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2462 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2463 getSubregIndexByMVT(VecVT, IsHi));
2464 if (IsHi)
2465 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2466 }
2467 return {SubRegIdx, InsertExtractIdx};
2468}
2469
2470// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2471// stores for those types.
2472bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2473 return !Subtarget.useRVVForFixedLengthVectors() ||
2474 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2475}
2476
2478 if (!ScalarTy.isSimple())
2479 return false;
2480 switch (ScalarTy.getSimpleVT().SimpleTy) {
2481 case MVT::iPTR:
2482 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2483 case MVT::i8:
2484 case MVT::i16:
2485 case MVT::i32:
2486 return true;
2487 case MVT::i64:
2488 return Subtarget.hasVInstructionsI64();
2489 case MVT::f16:
2490 return Subtarget.hasVInstructionsF16();
2491 case MVT::f32:
2492 return Subtarget.hasVInstructionsF32();
2493 case MVT::f64:
2494 return Subtarget.hasVInstructionsF64();
2495 default:
2496 return false;
2497 }
2498}
2499
2500
2501unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2502 return NumRepeatedDivisors;
2503}
2504
2506 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2507 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2508 "Unexpected opcode");
2509 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2510 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2512 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2513 if (!II)
2514 return SDValue();
2515 return Op.getOperand(II->VLOperand + 1 + HasChain);
2516}
2517
2519 const RISCVSubtarget &Subtarget) {
2520 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2521 if (!Subtarget.useRVVForFixedLengthVectors())
2522 return false;
2523
2524 // We only support a set of vector types with a consistent maximum fixed size
2525 // across all supported vector element types to avoid legalization issues.
2526 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2527 // fixed-length vector type we support is 1024 bytes.
2528 if (VT.getFixedSizeInBits() > 1024 * 8)
2529 return false;
2530
2531 unsigned MinVLen = Subtarget.getRealMinVLen();
2532
2533 MVT EltVT = VT.getVectorElementType();
2534
2535 // Don't use RVV for vectors we cannot scalarize if required.
2536 switch (EltVT.SimpleTy) {
2537 // i1 is supported but has different rules.
2538 default:
2539 return false;
2540 case MVT::i1:
2541 // Masks can only use a single register.
2542 if (VT.getVectorNumElements() > MinVLen)
2543 return false;
2544 MinVLen /= 8;
2545 break;
2546 case MVT::i8:
2547 case MVT::i16:
2548 case MVT::i32:
2549 break;
2550 case MVT::i64:
2551 if (!Subtarget.hasVInstructionsI64())
2552 return false;
2553 break;
2554 case MVT::f16:
2555 if (!Subtarget.hasVInstructionsF16Minimal())
2556 return false;
2557 break;
2558 case MVT::f32:
2559 if (!Subtarget.hasVInstructionsF32())
2560 return false;
2561 break;
2562 case MVT::f64:
2563 if (!Subtarget.hasVInstructionsF64())
2564 return false;
2565 break;
2566 }
2567
2568 // Reject elements larger than ELEN.
2569 if (EltVT.getSizeInBits() > Subtarget.getELen())
2570 return false;
2571
2572 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2573 // Don't use RVV for types that don't fit.
2574 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2575 return false;
2576
2577 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2578 // the base fixed length RVV support in place.
2579 if (!VT.isPow2VectorType())
2580 return false;
2581
2582 return true;
2583}
2584
2585bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2586 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2587}
2588
2589// Return the largest legal scalable vector type that matches VT's element type.
2591 const RISCVSubtarget &Subtarget) {
2592 // This may be called before legal types are setup.
2593 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2594 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2595 "Expected legal fixed length vector!");
2596
2597 unsigned MinVLen = Subtarget.getRealMinVLen();
2598 unsigned MaxELen = Subtarget.getELen();
2599
2600 MVT EltVT = VT.getVectorElementType();
2601 switch (EltVT.SimpleTy) {
2602 default:
2603 llvm_unreachable("unexpected element type for RVV container");
2604 case MVT::i1:
2605 case MVT::i8:
2606 case MVT::i16:
2607 case MVT::i32:
2608 case MVT::i64:
2609 case MVT::f16:
2610 case MVT::f32:
2611 case MVT::f64: {
2612 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2613 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2614 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2615 unsigned NumElts =
2617 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2618 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2619 return MVT::getScalableVectorVT(EltVT, NumElts);
2620 }
2621 }
2622}
2623
2625 const RISCVSubtarget &Subtarget) {
2627 Subtarget);
2628}
2629
2631 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2632}
2633
2634// Grow V to consume an entire RVV register.
2636 const RISCVSubtarget &Subtarget) {
2637 assert(VT.isScalableVector() &&
2638 "Expected to convert into a scalable vector!");
2639 assert(V.getValueType().isFixedLengthVector() &&
2640 "Expected a fixed length vector operand!");
2641 SDLoc DL(V);
2642 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2643 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2644}
2645
2646// Shrink V so it's just big enough to maintain a VT's worth of data.
2648 const RISCVSubtarget &Subtarget) {
2650 "Expected to convert into a fixed length vector!");
2651 assert(V.getValueType().isScalableVector() &&
2652 "Expected a scalable vector operand!");
2653 SDLoc DL(V);
2654 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2655 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2656}
2657
2658/// Return the type of the mask type suitable for masking the provided
2659/// vector type. This is simply an i1 element type vector of the same
2660/// (possibly scalable) length.
2661static MVT getMaskTypeFor(MVT VecVT) {
2662 assert(VecVT.isVector());
2664 return MVT::getVectorVT(MVT::i1, EC);
2665}
2666
2667/// Creates an all ones mask suitable for masking a vector of type VecTy with
2668/// vector length VL. .
2669static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2670 SelectionDAG &DAG) {
2671 MVT MaskVT = getMaskTypeFor(VecVT);
2672 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2673}
2674
2675static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2676 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2677 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2678 // canonicalize the representation. InsertVSETVLI will pick the immediate
2679 // encoding later if profitable.
2680 const auto [MinVLMAX, MaxVLMAX] =
2681 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2682 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2683 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2684
2685 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2686}
2687
2688static std::pair<SDValue, SDValue>
2690 const RISCVSubtarget &Subtarget) {
2691 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2692 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2693 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2694 return {Mask, VL};
2695}
2696
2697static std::pair<SDValue, SDValue>
2698getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2699 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2700 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2701 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2702 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2703 return {Mask, VL};
2704}
2705
2706// Gets the two common "VL" operands: an all-ones mask and the vector length.
2707// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2708// the vector type that the fixed-length vector is contained in. Otherwise if
2709// VecVT is scalable, then ContainerVT should be the same as VecVT.
2710static std::pair<SDValue, SDValue>
2711getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2712 const RISCVSubtarget &Subtarget) {
2713 if (VecVT.isFixedLengthVector())
2714 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2715 Subtarget);
2716 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2717 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2718}
2719
2721 SelectionDAG &DAG) const {
2722 assert(VecVT.isScalableVector() && "Expected scalable vector");
2723 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2724 VecVT.getVectorElementCount());
2725}
2726
2727std::pair<unsigned, unsigned>
2729 const RISCVSubtarget &Subtarget) {
2730 assert(VecVT.isScalableVector() && "Expected scalable vector");
2731
2732 unsigned EltSize = VecVT.getScalarSizeInBits();
2733 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2734
2735 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2736 unsigned MaxVLMAX =
2737 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2738
2739 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2740 unsigned MinVLMAX =
2741 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2742
2743 return std::make_pair(MinVLMAX, MaxVLMAX);
2744}
2745
2746// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2747// of either is (currently) supported. This can get us into an infinite loop
2748// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2749// as a ..., etc.
2750// Until either (or both) of these can reliably lower any node, reporting that
2751// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2752// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2753// which is not desirable.
2755 EVT VT, unsigned DefinedValues) const {
2756 return false;
2757}
2758
2760 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2761 // implementation-defined.
2762 if (!VT.isVector())
2764 unsigned DLenFactor = Subtarget.getDLenFactor();
2765 unsigned Cost;
2766 if (VT.isScalableVector()) {
2767 unsigned LMul;
2768 bool Fractional;
2769 std::tie(LMul, Fractional) =
2771 if (Fractional)
2772 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2773 else
2774 Cost = (LMul * DLenFactor);
2775 } else {
2776 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2777 }
2778 return Cost;
2779}
2780
2781
2782/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2783/// is generally quadratic in the number of vreg implied by LMUL. Note that
2784/// operand (index and possibly mask) are handled separately.
2786 return getLMULCost(VT) * getLMULCost(VT);
2787}
2788
2789/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2790/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2791/// or may track the vrgather.vv cost. It is implementation-dependent.
2793 return getLMULCost(VT);
2794}
2795
2796/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2797/// for the type VT. (This does not cover the vslide1up or vslide1down
2798/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2799/// or may track the vrgather.vv cost. It is implementation-dependent.
2801 return getLMULCost(VT);
2802}
2803
2804/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2805/// for the type VT. (This does not cover the vslide1up or vslide1down
2806/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2807/// or may track the vrgather.vv cost. It is implementation-dependent.
2809 return getLMULCost(VT);
2810}
2811
2813 const RISCVSubtarget &Subtarget) {
2814 // RISC-V FP-to-int conversions saturate to the destination register size, but
2815 // don't produce 0 for nan. We can use a conversion instruction and fix the
2816 // nan case with a compare and a select.
2817 SDValue Src = Op.getOperand(0);
2818
2819 MVT DstVT = Op.getSimpleValueType();
2820 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2821
2822 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2823
2824 if (!DstVT.isVector()) {
2825 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2826 // the result.
2827 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2828 Src.getValueType() == MVT::bf16) {
2829 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2830 }
2831
2832 unsigned Opc;
2833 if (SatVT == DstVT)
2834 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2835 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2837 else
2838 return SDValue();
2839 // FIXME: Support other SatVTs by clamping before or after the conversion.
2840
2841 SDLoc DL(Op);
2842 SDValue FpToInt = DAG.getNode(
2843 Opc, DL, DstVT, Src,
2845
2846 if (Opc == RISCVISD::FCVT_WU_RV64)
2847 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2848
2849 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2850 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2852 }
2853
2854 // Vectors.
2855
2856 MVT DstEltVT = DstVT.getVectorElementType();
2857 MVT SrcVT = Src.getSimpleValueType();
2858 MVT SrcEltVT = SrcVT.getVectorElementType();
2859 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2860 unsigned DstEltSize = DstEltVT.getSizeInBits();
2861
2862 // Only handle saturating to the destination type.
2863 if (SatVT != DstEltVT)
2864 return SDValue();
2865
2866 // FIXME: Don't support narrowing by more than 1 steps for now.
2867 if (SrcEltSize > (2 * DstEltSize))
2868 return SDValue();
2869
2870 MVT DstContainerVT = DstVT;
2871 MVT SrcContainerVT = SrcVT;
2872 if (DstVT.isFixedLengthVector()) {
2873 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2874 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2875 assert(DstContainerVT.getVectorElementCount() ==
2876 SrcContainerVT.getVectorElementCount() &&
2877 "Expected same element count");
2878 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2879 }
2880
2881 SDLoc DL(Op);
2882
2883 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2884
2885 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2886 {Src, Src, DAG.getCondCode(ISD::SETNE),
2887 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2888
2889 // Need to widen by more than 1 step, promote the FP type, then do a widening
2890 // convert.
2891 if (DstEltSize > (2 * SrcEltSize)) {
2892 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2893 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2894 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2895 }
2896
2897 unsigned RVVOpc =
2899 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2900
2901 SDValue SplatZero = DAG.getNode(
2902 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2903 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2904 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2905 Res, DAG.getUNDEF(DstContainerVT), VL);
2906
2907 if (DstVT.isFixedLengthVector())
2908 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2909
2910 return Res;
2911}
2912
2914 switch (Opc) {
2915 case ISD::FROUNDEVEN:
2917 case ISD::VP_FROUNDEVEN:
2918 return RISCVFPRndMode::RNE;
2919 case ISD::FTRUNC:
2920 case ISD::STRICT_FTRUNC:
2921 case ISD::VP_FROUNDTOZERO:
2922 return RISCVFPRndMode::RTZ;
2923 case ISD::FFLOOR:
2924 case ISD::STRICT_FFLOOR:
2925 case ISD::VP_FFLOOR:
2926 return RISCVFPRndMode::RDN;
2927 case ISD::FCEIL:
2928 case ISD::STRICT_FCEIL:
2929 case ISD::VP_FCEIL:
2930 return RISCVFPRndMode::RUP;
2931 case ISD::FROUND:
2932 case ISD::STRICT_FROUND:
2933 case ISD::VP_FROUND:
2934 return RISCVFPRndMode::RMM;
2935 case ISD::FRINT:
2936 return RISCVFPRndMode::DYN;
2937 }
2938
2940}
2941
2942// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2943// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2944// the integer domain and back. Taking care to avoid converting values that are
2945// nan or already correct.
2946static SDValue
2948 const RISCVSubtarget &Subtarget) {
2949 MVT VT = Op.getSimpleValueType();
2950 assert(VT.isVector() && "Unexpected type");
2951
2952 SDLoc DL(Op);
2953
2954 SDValue Src = Op.getOperand(0);
2955
2956 MVT ContainerVT = VT;
2957 if (VT.isFixedLengthVector()) {
2958 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2959 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2960 }
2961
2962 SDValue Mask, VL;
2963 if (Op->isVPOpcode()) {
2964 Mask = Op.getOperand(1);
2965 if (VT.isFixedLengthVector())
2966 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2967 Subtarget);
2968 VL = Op.getOperand(2);
2969 } else {
2970 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2971 }
2972
2973 // Freeze the source since we are increasing the number of uses.
2974 Src = DAG.getFreeze(Src);
2975
2976 // We do the conversion on the absolute value and fix the sign at the end.
2977 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2978
2979 // Determine the largest integer that can be represented exactly. This and
2980 // values larger than it don't have any fractional bits so don't need to
2981 // be converted.
2982 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2983 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2984 APFloat MaxVal = APFloat(FltSem);
2985 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2986 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2987 SDValue MaxValNode =
2988 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2989 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2990 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2991
2992 // If abs(Src) was larger than MaxVal or nan, keep it.
2993 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2994 Mask =
2995 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2996 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2997 Mask, Mask, VL});
2998
2999 // Truncate to integer and convert back to FP.
3000 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3001 MVT XLenVT = Subtarget.getXLenVT();
3002 SDValue Truncated;
3003
3004 switch (Op.getOpcode()) {
3005 default:
3006 llvm_unreachable("Unexpected opcode");
3007 case ISD::FCEIL:
3008 case ISD::VP_FCEIL:
3009 case ISD::FFLOOR:
3010 case ISD::VP_FFLOOR:
3011 case ISD::FROUND:
3012 case ISD::FROUNDEVEN:
3013 case ISD::VP_FROUND:
3014 case ISD::VP_FROUNDEVEN:
3015 case ISD::VP_FROUNDTOZERO: {
3018 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3019 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3020 break;
3021 }
3022 case ISD::FTRUNC:
3023 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3024 Mask, VL);
3025 break;
3026 case ISD::FRINT:
3027 case ISD::VP_FRINT:
3028 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3029 break;
3030 case ISD::FNEARBYINT:
3031 case ISD::VP_FNEARBYINT:
3032 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3033 Mask, VL);
3034 break;
3035 }
3036
3037 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3038 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3039 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3040 Mask, VL);
3041
3042 // Restore the original sign so that -0.0 is preserved.
3043 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3044 Src, Src, Mask, VL);
3045
3046 if (!VT.isFixedLengthVector())
3047 return Truncated;
3048
3049 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3050}
3051
3052// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3053// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3054// qNan and coverting the new source to integer and back to FP.
3055static SDValue
3057 const RISCVSubtarget &Subtarget) {
3058 SDLoc DL(Op);
3059 MVT VT = Op.getSimpleValueType();
3060 SDValue Chain = Op.getOperand(0);
3061 SDValue Src = Op.getOperand(1);
3062
3063 MVT ContainerVT = VT;
3064 if (VT.isFixedLengthVector()) {
3065 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3066 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3067 }
3068
3069 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3070
3071 // Freeze the source since we are increasing the number of uses.
3072 Src = DAG.getFreeze(Src);
3073
3074 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3075 MVT MaskVT = Mask.getSimpleValueType();
3077 DAG.getVTList(MaskVT, MVT::Other),
3078 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3079 DAG.getUNDEF(MaskVT), Mask, VL});
3080 Chain = Unorder.getValue(1);
3082 DAG.getVTList(ContainerVT, MVT::Other),
3083 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3084 Chain = Src.getValue(1);
3085
3086 // We do the conversion on the absolute value and fix the sign at the end.
3087 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3088
3089 // Determine the largest integer that can be represented exactly. This and
3090 // values larger than it don't have any fractional bits so don't need to
3091 // be converted.
3092 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3093 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3094 APFloat MaxVal = APFloat(FltSem);
3095 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3096 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3097 SDValue MaxValNode =
3098 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3099 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3100 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3101
3102 // If abs(Src) was larger than MaxVal or nan, keep it.
3103 Mask = DAG.getNode(
3104 RISCVISD::SETCC_VL, DL, MaskVT,
3105 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3106
3107 // Truncate to integer and convert back to FP.
3108 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3109 MVT XLenVT = Subtarget.getXLenVT();
3110 SDValue Truncated;
3111
3112 switch (Op.getOpcode()) {
3113 default:
3114 llvm_unreachable("Unexpected opcode");
3115 case ISD::STRICT_FCEIL:
3116 case ISD::STRICT_FFLOOR:
3117 case ISD::STRICT_FROUND:
3121 Truncated = DAG.getNode(
3122 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3123 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3124 break;
3125 }
3126 case ISD::STRICT_FTRUNC:
3127 Truncated =
3129 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3130 break;
3133 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3134 Mask, VL);
3135 break;
3136 }
3137 Chain = Truncated.getValue(1);
3138
3139 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3140 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3141 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3142 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3143 Truncated, Mask, VL);
3144 Chain = Truncated.getValue(1);
3145 }
3146
3147 // Restore the original sign so that -0.0 is preserved.
3148 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3149 Src, Src, Mask, VL);
3150
3151 if (VT.isFixedLengthVector())
3152 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3153 return DAG.getMergeValues({Truncated, Chain}, DL);
3154}
3155
3156static SDValue
3158 const RISCVSubtarget &Subtarget) {
3159 MVT VT = Op.getSimpleValueType();
3160 if (VT.isVector())
3161 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3162
3163 if (DAG.shouldOptForSize())
3164 return SDValue();
3165
3166 SDLoc DL(Op);
3167 SDValue Src = Op.getOperand(0);
3168
3169 // Create an integer the size of the mantissa with the MSB set. This and all
3170 // values larger than it don't have any fractional bits so don't need to be
3171 // converted.
3172 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3173 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3174 APFloat MaxVal = APFloat(FltSem);
3175 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3176 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3177 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3178
3180 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3181 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3182}
3183
3184// Expand vector LRINT and LLRINT by converting to the integer domain.
3186 const RISCVSubtarget &Subtarget) {
3187 MVT VT = Op.getSimpleValueType();
3188 assert(VT.isVector() && "Unexpected type");
3189
3190 SDLoc DL(Op);
3191 SDValue Src = Op.getOperand(0);
3192 MVT ContainerVT = VT;
3193
3194 if (VT.isFixedLengthVector()) {
3195 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3196 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3197 }
3198
3199 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3200 SDValue Truncated =
3201 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3202
3203 if (!VT.isFixedLengthVector())
3204 return Truncated;
3205
3206 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3207}
3208
3209static SDValue
3211 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3212 SDValue Offset, SDValue Mask, SDValue VL,
3214 if (Merge.isUndef())
3216 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3217 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3218 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3219}
3220
3221static SDValue
3222getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3224 SDValue VL,
3226 if (Merge.isUndef())
3228 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3229 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3230 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3231}
3232
3233static MVT getLMUL1VT(MVT VT) {
3235 "Unexpected vector MVT");
3239}
3240
3244 int64_t Addend;
3245};
3246
3247static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3249 // We will use a SINT_TO_FP to materialize this constant so we should use a
3250 // signed APSInt here.
3251 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3252 // We use an arbitrary rounding mode here. If a floating-point is an exact
3253 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3254 // the rounding mode changes the output value, then it is not an exact
3255 // integer.
3257 bool IsExact;
3258 // If it is out of signed integer range, it will return an invalid operation.
3259 // If it is not an exact integer, IsExact is false.
3260 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3262 !IsExact)
3263 return std::nullopt;
3264 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3265}
3266
3267// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3268// to the (non-zero) step S and start value X. This can be then lowered as the
3269// RVV sequence (VID * S) + X, for example.
3270// The step S is represented as an integer numerator divided by a positive
3271// denominator. Note that the implementation currently only identifies
3272// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3273// cannot detect 2/3, for example.
3274// Note that this method will also match potentially unappealing index
3275// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3276// determine whether this is worth generating code for.
3277static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3278 unsigned EltSizeInBits) {
3279 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3280 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3281 return std::nullopt;
3282 bool IsInteger = Op.getValueType().isInteger();
3283
3284 std::optional<unsigned> SeqStepDenom;
3285 std::optional<int64_t> SeqStepNum, SeqAddend;
3286 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3287 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3288
3289 // First extract the ops into a list of constant integer values. This may not
3290 // be possible for floats if they're not all representable as integers.
3292 const unsigned OpSize = Op.getScalarValueSizeInBits();
3293 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3294 if (Elt.isUndef()) {
3295 Elts[Idx] = std::nullopt;
3296 continue;
3297 }
3298 if (IsInteger) {
3299 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3300 } else {
3301 auto ExactInteger =
3302 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3303 if (!ExactInteger)
3304 return std::nullopt;
3305 Elts[Idx] = *ExactInteger;
3306 }
3307 }
3308
3309 for (auto [Idx, Elt] : enumerate(Elts)) {
3310 // Assume undef elements match the sequence; we just have to be careful
3311 // when interpolating across them.
3312 if (!Elt)
3313 continue;
3314
3315 if (PrevElt) {
3316 // Calculate the step since the last non-undef element, and ensure
3317 // it's consistent across the entire sequence.
3318 unsigned IdxDiff = Idx - PrevElt->second;
3319 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3320
3321 // A zero-value value difference means that we're somewhere in the middle
3322 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3323 // step change before evaluating the sequence.
3324 if (ValDiff == 0)
3325 continue;
3326
3327 int64_t Remainder = ValDiff % IdxDiff;
3328 // Normalize the step if it's greater than 1.
3329 if (Remainder != ValDiff) {
3330 // The difference must cleanly divide the element span.
3331 if (Remainder != 0)
3332 return std::nullopt;
3333 ValDiff /= IdxDiff;
3334 IdxDiff = 1;
3335 }
3336
3337 if (!SeqStepNum)
3338 SeqStepNum = ValDiff;
3339 else if (ValDiff != SeqStepNum)
3340 return std::nullopt;
3341
3342 if (!SeqStepDenom)
3343 SeqStepDenom = IdxDiff;
3344 else if (IdxDiff != *SeqStepDenom)
3345 return std::nullopt;
3346 }
3347
3348 // Record this non-undef element for later.
3349 if (!PrevElt || PrevElt->first != *Elt)
3350 PrevElt = std::make_pair(*Elt, Idx);
3351 }
3352
3353 // We need to have logged a step for this to count as a legal index sequence.
3354 if (!SeqStepNum || !SeqStepDenom)
3355 return std::nullopt;
3356
3357 // Loop back through the sequence and validate elements we might have skipped
3358 // while waiting for a valid step. While doing this, log any sequence addend.
3359 for (auto [Idx, Elt] : enumerate(Elts)) {
3360 if (!Elt)
3361 continue;
3362 uint64_t ExpectedVal =
3363 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3364 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3365 if (!SeqAddend)
3366 SeqAddend = Addend;
3367 else if (Addend != SeqAddend)
3368 return std::nullopt;
3369 }
3370
3371 assert(SeqAddend && "Must have an addend if we have a step");
3372
3373 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3374}
3375
3376// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3377// and lower it as a VRGATHER_VX_VL from the source vector.
3378static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3379 SelectionDAG &DAG,
3380 const RISCVSubtarget &Subtarget) {
3381 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3382 return SDValue();
3383 SDValue Vec = SplatVal.getOperand(0);
3384 // Only perform this optimization on vectors of the same size for simplicity.
3385 // Don't perform this optimization for i1 vectors.
3386 // FIXME: Support i1 vectors, maybe by promoting to i8?
3387 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3388 return SDValue();
3389 SDValue Idx = SplatVal.getOperand(1);
3390 // The index must be a legal type.
3391 if (Idx.getValueType() != Subtarget.getXLenVT())
3392 return SDValue();
3393
3394 MVT ContainerVT = VT;
3395 if (VT.isFixedLengthVector()) {
3396 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3397 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3398 }
3399
3400 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3401
3402 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3403 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3404
3405 if (!VT.isFixedLengthVector())
3406 return Gather;
3407
3408 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3409}
3410
3411
3412/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3413/// which constitute a large proportion of the elements. In such cases we can
3414/// splat a vector with the dominant element and make up the shortfall with
3415/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3416/// Note that this includes vectors of 2 elements by association. The
3417/// upper-most element is the "dominant" one, allowing us to use a splat to
3418/// "insert" the upper element, and an insert of the lower element at position
3419/// 0, which improves codegen.
3421 const RISCVSubtarget &Subtarget) {
3422 MVT VT = Op.getSimpleValueType();
3423 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3424
3425 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3426
3427 SDLoc DL(Op);
3428 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3429
3430 MVT XLenVT = Subtarget.getXLenVT();
3431 unsigned NumElts = Op.getNumOperands();
3432
3433 SDValue DominantValue;
3434 unsigned MostCommonCount = 0;
3435 DenseMap<SDValue, unsigned> ValueCounts;
3436 unsigned NumUndefElts =
3437 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3438
3439 // Track the number of scalar loads we know we'd be inserting, estimated as
3440 // any non-zero floating-point constant. Other kinds of element are either
3441 // already in registers or are materialized on demand. The threshold at which
3442 // a vector load is more desirable than several scalar materializion and
3443 // vector-insertion instructions is not known.
3444 unsigned NumScalarLoads = 0;
3445
3446 for (SDValue V : Op->op_values()) {
3447 if (V.isUndef())
3448 continue;
3449
3450 ValueCounts.insert(std::make_pair(V, 0));
3451 unsigned &Count = ValueCounts[V];
3452 if (0 == Count)
3453 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3454 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3455
3456 // Is this value dominant? In case of a tie, prefer the highest element as
3457 // it's cheaper to insert near the beginning of a vector than it is at the
3458 // end.
3459 if (++Count >= MostCommonCount) {
3460 DominantValue = V;
3461 MostCommonCount = Count;
3462 }
3463 }
3464
3465 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3466 unsigned NumDefElts = NumElts - NumUndefElts;
3467 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3468
3469 // Don't perform this optimization when optimizing for size, since
3470 // materializing elements and inserting them tends to cause code bloat.
3471 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3472 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3473 ((MostCommonCount > DominantValueCountThreshold) ||
3474 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3475 // Start by splatting the most common element.
3476 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3477
3478 DenseSet<SDValue> Processed{DominantValue};
3479
3480 // We can handle an insert into the last element (of a splat) via
3481 // v(f)slide1down. This is slightly better than the vslideup insert
3482 // lowering as it avoids the need for a vector group temporary. It
3483 // is also better than using vmerge.vx as it avoids the need to
3484 // materialize the mask in a vector register.
3485 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3486 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3487 LastOp != DominantValue) {
3488 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3489 auto OpCode =
3491 if (!VT.isFloatingPoint())
3492 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3493 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3494 LastOp, Mask, VL);
3495 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3496 Processed.insert(LastOp);
3497 }
3498
3499 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3500 for (const auto &OpIdx : enumerate(Op->ops())) {
3501 const SDValue &V = OpIdx.value();
3502 if (V.isUndef() || !Processed.insert(V).second)
3503 continue;
3504 if (ValueCounts[V] == 1) {
3505 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3506 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3507 } else {
3508 // Blend in all instances of this value using a VSELECT, using a
3509 // mask where each bit signals whether that element is the one
3510 // we're after.
3512 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3513 return DAG.getConstant(V == V1, DL, XLenVT);
3514 });
3515 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3516 DAG.getBuildVector(SelMaskTy, DL, Ops),
3517 DAG.getSplatBuildVector(VT, DL, V), Vec);
3518 }
3519 }
3520
3521 return Vec;
3522 }
3523
3524 return SDValue();
3525}
3526
3528 const RISCVSubtarget &Subtarget) {
3529 MVT VT = Op.getSimpleValueType();
3530 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3531
3532 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3533
3534 SDLoc DL(Op);
3535 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3536
3537 MVT XLenVT = Subtarget.getXLenVT();
3538 unsigned NumElts = Op.getNumOperands();
3539
3540 if (VT.getVectorElementType() == MVT::i1) {
3541 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3542 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3543 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3544 }
3545
3546 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3547 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3548 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3549 }
3550
3551 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3552 // scalar integer chunks whose bit-width depends on the number of mask
3553 // bits and XLEN.
3554 // First, determine the most appropriate scalar integer type to use. This
3555 // is at most XLenVT, but may be shrunk to a smaller vector element type
3556 // according to the size of the final vector - use i8 chunks rather than
3557 // XLenVT if we're producing a v8i1. This results in more consistent
3558 // codegen across RV32 and RV64.
3559 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3560 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3561 // If we have to use more than one INSERT_VECTOR_ELT then this
3562 // optimization is likely to increase code size; avoid peforming it in
3563 // such a case. We can use a load from a constant pool in this case.
3564 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3565 return SDValue();
3566 // Now we can create our integer vector type. Note that it may be larger
3567 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3568 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3569 MVT IntegerViaVecVT =
3570 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3571 IntegerViaVecElts);
3572
3573 uint64_t Bits = 0;
3574 unsigned BitPos = 0, IntegerEltIdx = 0;
3575 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3576
3577 for (unsigned I = 0; I < NumElts;) {
3578 SDValue V = Op.getOperand(I);
3579 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3580 Bits |= ((uint64_t)BitValue << BitPos);
3581 ++BitPos;
3582 ++I;
3583
3584 // Once we accumulate enough bits to fill our scalar type or process the
3585 // last element, insert into our vector and clear our accumulated data.
3586 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3587 if (NumViaIntegerBits <= 32)
3588 Bits = SignExtend64<32>(Bits);
3589 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3590 Elts[IntegerEltIdx] = Elt;
3591 Bits = 0;
3592 BitPos = 0;
3593 IntegerEltIdx++;
3594 }
3595 }
3596
3597 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3598
3599 if (NumElts < NumViaIntegerBits) {
3600 // If we're producing a smaller vector than our minimum legal integer
3601 // type, bitcast to the equivalent (known-legal) mask type, and extract
3602 // our final mask.
3603 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3604 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3605 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3606 DAG.getConstant(0, DL, XLenVT));
3607 } else {
3608 // Else we must have produced an integer type with the same size as the
3609 // mask type; bitcast for the final result.
3610 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3611 Vec = DAG.getBitcast(VT, Vec);
3612 }
3613
3614 return Vec;
3615 }
3616
3617 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3618 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3620 if (!VT.isFloatingPoint())
3621 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3622 Splat =
3623 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3624 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3625 }
3626
3627 // Try and match index sequences, which we can lower to the vid instruction
3628 // with optional modifications. An all-undef vector is matched by
3629 // getSplatValue, above.
3630 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3631 int64_t StepNumerator = SimpleVID->StepNumerator;
3632 unsigned StepDenominator = SimpleVID->StepDenominator;
3633 int64_t Addend = SimpleVID->Addend;
3634
3635 assert(StepNumerator != 0 && "Invalid step");
3636 bool Negate = false;
3637 int64_t SplatStepVal = StepNumerator;
3638 unsigned StepOpcode = ISD::MUL;
3639 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3640 // anyway as the shift of 63 won't fit in uimm5.
3641 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3642 isPowerOf2_64(std::abs(StepNumerator))) {
3643 Negate = StepNumerator < 0;
3644 StepOpcode = ISD::SHL;
3645 SplatStepVal = Log2_64(std::abs(StepNumerator));
3646 }
3647
3648 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3649 // threshold since it's the immediate value many RVV instructions accept.
3650 // There is no vmul.vi instruction so ensure multiply constant can fit in
3651 // a single addi instruction.
3652 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3653 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3654 isPowerOf2_32(StepDenominator) &&
3655 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3656 MVT VIDVT =
3658 MVT VIDContainerVT =
3659 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3660 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3661 // Convert right out of the scalable type so we can use standard ISD
3662 // nodes for the rest of the computation. If we used scalable types with
3663 // these, we'd lose the fixed-length vector info and generate worse
3664 // vsetvli code.
3665 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3666 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3667 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3668 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3669 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3670 }
3671 if (StepDenominator != 1) {
3672 SDValue SplatStep =
3673 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3674 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3675 }
3676 if (Addend != 0 || Negate) {
3677 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3678 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3679 VID);
3680 }
3681 if (VT.isFloatingPoint()) {
3682 // TODO: Use vfwcvt to reduce register pressure.
3683 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3684 }
3685 return VID;
3686 }
3687 }
3688
3689 // For very small build_vectors, use a single scalar insert of a constant.
3690 // TODO: Base this on constant rematerialization cost, not size.
3691 const unsigned EltBitSize = VT.getScalarSizeInBits();
3692 if (VT.getSizeInBits() <= 32 &&
3694 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3695 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3696 "Unexpected sequence type");
3697 // If we can use the original VL with the modified element type, this
3698 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3699 // be moved into InsertVSETVLI?
3700 unsigned ViaVecLen =
3701 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3702 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3703
3704 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3705 uint64_t SplatValue = 0;
3706 // Construct the amalgamated value at this larger vector type.
3707 for (const auto &OpIdx : enumerate(Op->op_values())) {
3708 const auto &SeqV = OpIdx.value();
3709 if (!SeqV.isUndef())
3710 SplatValue |=
3711 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3712 }
3713
3714 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3715 // achieve better constant materializion.
3716 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3717 SplatValue = SignExtend64<32>(SplatValue);
3718
3719 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3720 DAG.getUNDEF(ViaVecVT),
3721 DAG.getConstant(SplatValue, DL, XLenVT),
3722 DAG.getVectorIdxConstant(0, DL));
3723 if (ViaVecLen != 1)
3725 MVT::getVectorVT(ViaIntVT, 1), Vec,
3726 DAG.getConstant(0, DL, XLenVT));
3727 return DAG.getBitcast(VT, Vec);
3728 }
3729
3730
3731 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3732 // when re-interpreted as a vector with a larger element type. For example,
3733 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3734 // could be instead splat as
3735 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3736 // TODO: This optimization could also work on non-constant splats, but it
3737 // would require bit-manipulation instructions to construct the splat value.
3738 SmallVector<SDValue> Sequence;
3739 const auto *BV = cast<BuildVectorSDNode>(Op);
3740 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3742 BV->getRepeatedSequence(Sequence) &&
3743 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3744 unsigned SeqLen = Sequence.size();
3745 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3746 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3747 ViaIntVT == MVT::i64) &&
3748 "Unexpected sequence type");
3749
3750 // If we can use the original VL with the modified element type, this
3751 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3752 // be moved into InsertVSETVLI?
3753 const unsigned RequiredVL = NumElts / SeqLen;
3754 const unsigned ViaVecLen =
3755 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3756 NumElts : RequiredVL;
3757 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3758
3759 unsigned EltIdx = 0;
3760 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3761 uint64_t SplatValue = 0;
3762 // Construct the amalgamated value which can be splatted as this larger
3763 // vector type.
3764 for (const auto &SeqV : Sequence) {
3765 if (!SeqV.isUndef())
3766 SplatValue |=
3767 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3768 EltIdx++;
3769 }
3770
3771 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3772 // achieve better constant materializion.
3773 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3774 SplatValue = SignExtend64<32>(SplatValue);
3775
3776 // Since we can't introduce illegal i64 types at this stage, we can only
3777 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3778 // way we can use RVV instructions to splat.
3779 assert((ViaIntVT.bitsLE(XLenVT) ||
3780 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3781 "Unexpected bitcast sequence");
3782 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3783 SDValue ViaVL =
3784 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3785 MVT ViaContainerVT =
3786 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3787 SDValue Splat =
3788 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3789 DAG.getUNDEF(ViaContainerVT),
3790 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3791 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3792 if (ViaVecLen != RequiredVL)
3794 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3795 DAG.getConstant(0, DL, XLenVT));
3796 return DAG.getBitcast(VT, Splat);
3797 }
3798 }
3799
3800 // If the number of signbits allows, see if we can lower as a <N x i8>.
3801 // Our main goal here is to reduce LMUL (and thus work) required to
3802 // build the constant, but we will also narrow if the resulting
3803 // narrow vector is known to materialize cheaply.
3804 // TODO: We really should be costing the smaller vector. There are
3805 // profitable cases this misses.
3806 if (EltBitSize > 8 && VT.isInteger() &&
3807 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3808 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3809 if (EltBitSize - SignBits < 8) {
3810 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3811 DL, Op->ops());
3812 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3813 Source, DAG, Subtarget);
3814 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3815 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3816 }
3817 }
3818
3819 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3820 return Res;
3821
3822 // For constant vectors, use generic constant pool lowering. Otherwise,
3823 // we'd have to materialize constants in GPRs just to move them into the
3824 // vector.
3825 return SDValue();
3826}
3827
3829 const RISCVSubtarget &Subtarget) {
3830 MVT VT = Op.getSimpleValueType();
3831 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3832
3833 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3835 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3836
3837 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3838
3839 SDLoc DL(Op);
3840 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3841
3842 MVT XLenVT = Subtarget.getXLenVT();
3843
3844 if (VT.getVectorElementType() == MVT::i1) {
3845 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3846 // vector type, we have a legal equivalently-sized i8 type, so we can use
3847 // that.
3848 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3849 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3850
3851 SDValue WideVec;
3852 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3853 // For a splat, perform a scalar truncate before creating the wider
3854 // vector.
3855 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3856 DAG.getConstant(1, DL, Splat.getValueType()));
3857 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3858 } else {
3859 SmallVector<SDValue, 8> Ops(Op->op_values());
3860 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3861 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3862 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3863 }
3864
3865 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3866 }
3867
3868 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3869 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3870 return Gather;
3871 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3873 if (!VT.isFloatingPoint())
3874 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3875 Splat =
3876 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3877 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3878 }
3879
3880 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3881 return Res;
3882
3883 // If we're compiling for an exact VLEN value, we can split our work per
3884 // register in the register group.
3885 if (const auto VLen = Subtarget.getRealVLen();
3886 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3887 MVT ElemVT = VT.getVectorElementType();
3888 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3889 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3890 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3891 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3892 assert(M1VT == getLMUL1VT(M1VT));
3893
3894 // The following semantically builds up a fixed length concat_vector
3895 // of the component build_vectors. We eagerly lower to scalable and
3896 // insert_subvector here to avoid DAG combining it back to a large
3897 // build_vector.
3898 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3899 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3900 SDValue Vec = DAG.getUNDEF(ContainerVT);
3901 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3902 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3903 SDValue SubBV =
3904 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3905 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3906 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3907 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3908 DAG.getVectorIdxConstant(InsertIdx, DL));
3909 }
3910 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3911 }
3912
3913 // For m1 vectors, if we have non-undef values in both halves of our vector,
3914 // split the vector into low and high halves, build them separately, then
3915 // use a vselect to combine them. For long vectors, this cuts the critical
3916 // path of the vslide1down sequence in half, and gives us an opportunity
3917 // to special case each half independently. Note that we don't change the
3918 // length of the sub-vectors here, so if both fallback to the generic
3919 // vslide1down path, we should be able to fold the vselect into the final
3920 // vslidedown (for the undef tail) for the first half w/ masking.
3921 unsigned NumElts = VT.getVectorNumElements();
3922 unsigned NumUndefElts =
3923 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3924 unsigned NumDefElts = NumElts - NumUndefElts;
3925 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3926 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3927 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3928 SmallVector<SDValue> MaskVals;
3929 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
3930 SubVecAOps.reserve(NumElts);
3931 SubVecBOps.reserve(NumElts);
3932 for (unsigned i = 0; i < NumElts; i++) {
3933 SDValue Elem = Op->getOperand(i);
3934 if (i < NumElts / 2) {
3935 SubVecAOps.push_back(Elem);
3936 SubVecBOps.push_back(UndefElem);
3937 } else {
3938 SubVecAOps.push_back(UndefElem);
3939 SubVecBOps.push_back(Elem);
3940 }
3941 bool SelectMaskVal = (i < NumElts / 2);
3942 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3943 }
3944 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3945 MaskVals.size() == NumElts);
3946
3947 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
3948 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
3949 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3950 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3951 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
3952 }
3953
3954 // Cap the cost at a value linear to the number of elements in the vector.
3955 // The default lowering is to use the stack. The vector store + scalar loads
3956 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3957 // being (at least) linear in LMUL. As a result, using the vslidedown
3958 // lowering for every element ends up being VL*LMUL..
3959 // TODO: Should we be directly costing the stack alternative? Doing so might
3960 // give us a more accurate upper bound.
3961 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3962
3963 // TODO: unify with TTI getSlideCost.
3964 InstructionCost PerSlideCost = 1;
3965 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3966 default: break;
3968 PerSlideCost = 2;
3969 break;
3971 PerSlideCost = 4;
3972 break;
3974 PerSlideCost = 8;
3975 break;
3976 }
3977
3978 // TODO: Should we be using the build instseq then cost + evaluate scheme
3979 // we use for integer constants here?
3980 unsigned UndefCount = 0;
3981 for (const SDValue &V : Op->ops()) {
3982 if (V.isUndef()) {
3983 UndefCount++;
3984 continue;
3985 }
3986 if (UndefCount) {
3987 LinearBudget -= PerSlideCost;
3988 UndefCount = 0;
3989 }
3990 LinearBudget -= PerSlideCost;
3991 }
3992 if (UndefCount) {
3993 LinearBudget -= PerSlideCost;
3994 }
3995
3996 if (LinearBudget < 0)
3997 return SDValue();
3998
3999 assert((!VT.isFloatingPoint() ||
4000 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4001 "Illegal type which will result in reserved encoding");
4002
4003 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4004
4005 SDValue Vec;
4006 UndefCount = 0;
4007 for (SDValue V : Op->ops()) {
4008 if (V.isUndef()) {
4009 UndefCount++;
4010 continue;
4011 }
4012
4013 // Start our sequence with a TA splat in the hopes that hardware is able to
4014 // recognize there's no dependency on the prior value of our temporary
4015 // register.
4016 if (!Vec) {
4017 Vec = DAG.getSplatVector(VT, DL, V);
4018 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4019 UndefCount = 0;
4020 continue;
4021 }
4022
4023 if (UndefCount) {
4024 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4025 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4026 Vec, Offset, Mask, VL, Policy);
4027 UndefCount = 0;
4028 }
4029 auto OpCode =
4031 if (!VT.isFloatingPoint())
4032 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4033 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4034 V, Mask, VL);
4035 }
4036 if (UndefCount) {
4037 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4038 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4039 Vec, Offset, Mask, VL, Policy);
4040 }
4041 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4042}
4043
4044static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4046 SelectionDAG &DAG) {
4047 if (!Passthru)
4048 Passthru = DAG.getUNDEF(VT);
4049 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4050 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4051 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4052 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4053 // node in order to try and match RVV vector/scalar instructions.
4054 if ((LoC >> 31) == HiC)
4055 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4056
4057 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4058 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4059 // vlmax vsetvli or vsetivli to change the VL.
4060 // FIXME: Support larger constants?
4061 // FIXME: Support non-constant VLs by saturating?
4062 if (LoC == HiC) {
4063 SDValue NewVL;
4064 if (isAllOnesConstant(VL) ||
4065 (isa<RegisterSDNode>(VL) &&
4066 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4067 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4068 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4069 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4070
4071 if (NewVL) {
4072 MVT InterVT =
4073 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4074 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4075 DAG.getUNDEF(InterVT), Lo, NewVL);
4076 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4077 }
4078 }
4079 }
4080
4081 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4082 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4083 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4084 Hi.getConstantOperandVal(1) == 31)
4085 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4086
4087 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4088 // even if it might be sign extended.
4089 if (Hi.isUndef())
4090 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4091
4092 // Fall back to a stack store and stride x0 vector load.
4093 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4094 Hi, VL);
4095}
4096
4097// Called by type legalization to handle splat of i64 on RV32.
4098// FIXME: We can optimize this when the type has sign or zero bits in one
4099// of the halves.
4100static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4101 SDValue Scalar, SDValue VL,
4102 SelectionDAG &DAG) {
4103 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4104 SDValue Lo, Hi;
4105 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4106 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4107}
4108
4109// This function lowers a splat of a scalar operand Splat with the vector
4110// length VL. It ensures the final sequence is type legal, which is useful when
4111// lowering a splat after type legalization.
4112static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4113 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4114 const RISCVSubtarget &Subtarget) {
4115 bool HasPassthru = Passthru && !Passthru.isUndef();
4116 if (!HasPassthru && !Passthru)
4117 Passthru = DAG.getUNDEF(VT);
4118 if (VT.isFloatingPoint())
4119 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4120
4121 MVT XLenVT = Subtarget.getXLenVT();
4122
4123 // Simplest case is that the operand needs to be promoted to XLenVT.
4124 if (Scalar.getValueType().bitsLE(XLenVT)) {
4125 // If the operand is a constant, sign extend to increase our chances
4126 // of being able to use a .vi instruction. ANY_EXTEND would become a
4127 // a zero extend and the simm5 check in isel would fail.
4128 // FIXME: Should we ignore the upper bits in isel instead?
4129 unsigned ExtOpc =
4130 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4131 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4132 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4133 }
4134
4135 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4136 "Unexpected scalar for splat lowering!");
4137
4138 if (isOneConstant(VL) && isNullConstant(Scalar))
4139 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4140 DAG.getConstant(0, DL, XLenVT), VL);
4141
4142 // Otherwise use the more complicated splatting algorithm.
4143 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4144}
4145
4146// This function lowers an insert of a scalar operand Scalar into lane
4147// 0 of the vector regardless of the value of VL. The contents of the
4148// remaining lanes of the result vector are unspecified. VL is assumed
4149// to be non-zero.
4151 const SDLoc &DL, SelectionDAG &DAG,
4152 const RISCVSubtarget &Subtarget) {
4153 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4154
4155 const MVT XLenVT = Subtarget.getXLenVT();
4156 SDValue Passthru = DAG.getUNDEF(VT);
4157
4158 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4159 isNullConstant(Scalar.getOperand(1))) {
4160 SDValue ExtractedVal = Scalar.getOperand(0);
4161 // The element types must be the same.
4162 if (ExtractedVal.getValueType().getVectorElementType() ==
4163 VT.getVectorElementType()) {
4164 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4165 MVT ExtractedContainerVT = ExtractedVT;
4166 if (ExtractedContainerVT.isFixedLengthVector()) {
4167 ExtractedContainerVT = getContainerForFixedLengthVector(
4168 DAG, ExtractedContainerVT, Subtarget);
4169 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4170 ExtractedVal, DAG, Subtarget);
4171 }
4172 if (ExtractedContainerVT.bitsLE(VT))
4173 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4174 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4175 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4176 DAG.getVectorIdxConstant(0, DL));
4177 }
4178 }
4179
4180
4181 if (VT.isFloatingPoint())
4182 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4183 DAG.getUNDEF(VT), Scalar, VL);
4184
4185 // Avoid the tricky legalization cases by falling back to using the
4186 // splat code which already handles it gracefully.
4187 if (!Scalar.getValueType().bitsLE(XLenVT))
4188 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4189 DAG.getConstant(1, DL, XLenVT),
4190 VT, DL, DAG, Subtarget);
4191
4192 // If the operand is a constant, sign extend to increase our chances
4193 // of being able to use a .vi instruction. ANY_EXTEND would become a
4194 // a zero extend and the simm5 check in isel would fail.
4195 // FIXME: Should we ignore the upper bits in isel instead?
4196 unsigned ExtOpc =
4197 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4198 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4199 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4200 DAG.getUNDEF(VT), Scalar, VL);
4201}
4202
4203// Is this a shuffle extracts either the even or odd elements of a vector?
4204// That is, specifically, either (a) or (b) below.
4205// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4206// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4207// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4208// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4209// Returns {Src Vector, Even Elements} om success
4210static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4211 SDValue V2, ArrayRef<int> Mask,
4212 const RISCVSubtarget &Subtarget) {
4213 // Need to be able to widen the vector.
4214 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4215 return false;
4216
4217 // Both input must be extracts.
4218 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4219 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4220 return false;
4221
4222 // Extracting from the same source.
4223 SDValue Src = V1.getOperand(0);
4224 if (Src != V2.getOperand(0))
4225 return false;
4226
4227 // Src needs to have twice the number of elements.
4228 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4229 return false;
4230
4231 // The extracts must extract the two halves of the source.
4232 if (V1.getConstantOperandVal(1) != 0 ||
4233 V2.getConstantOperandVal(1) != Mask.size())
4234 return false;
4235
4236 // First index must be the first even or odd element from V1.
4237 if (Mask[0] != 0 && Mask[0] != 1)
4238 return false;
4239
4240 // The others must increase by 2 each time.
4241 // TODO: Support undef elements?
4242 for (unsigned i = 1; i != Mask.size(); ++i)
4243 if (Mask[i] != Mask[i - 1] + 2)
4244 return false;
4245
4246 return true;
4247}
4248
4249/// Is this shuffle interleaving contiguous elements from one vector into the
4250/// even elements and contiguous elements from another vector into the odd
4251/// elements. \p EvenSrc will contain the element that should be in the first
4252/// even element. \p OddSrc will contain the element that should be in the first
4253/// odd element. These can be the first element in a source or the element half
4254/// way through the source.
4255static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4256 int &OddSrc, const RISCVSubtarget &Subtarget) {
4257 // We need to be able to widen elements to the next larger integer type.
4258 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4259 return false;
4260
4261 int Size = Mask.size();
4262 int NumElts = VT.getVectorNumElements();
4263 assert(Size == (int)NumElts && "Unexpected mask size");
4264
4265 SmallVector<unsigned, 2> StartIndexes;
4266 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4267 return false;
4268
4269 EvenSrc = StartIndexes[0];
4270 OddSrc = StartIndexes[1];
4271
4272 // One source should be low half of first vector.
4273 if (EvenSrc != 0 && OddSrc != 0)
4274 return false;
4275
4276 // Subvectors will be subtracted from either at the start of the two input
4277 // vectors, or at the start and middle of the first vector if it's an unary
4278 // interleave.
4279 // In both cases, HalfNumElts will be extracted.
4280 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4281 // we'll create an illegal extract_subvector.
4282 // FIXME: We could support other values using a slidedown first.
4283 int HalfNumElts = NumElts / 2;
4284 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4285}
4286
4287/// Match shuffles that concatenate two vectors, rotate the concatenation,
4288/// and then extract the original number of elements from the rotated result.
4289/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4290/// returned rotation amount is for a rotate right, where elements move from
4291/// higher elements to lower elements. \p LoSrc indicates the first source
4292/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4293/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4294/// 0 or 1 if a rotation is found.
4295///
4296/// NOTE: We talk about rotate to the right which matches how bit shift and
4297/// rotate instructions are described where LSBs are on the right, but LLVM IR
4298/// and the table below write vectors with the lowest elements on the left.
4299static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4300 int Size = Mask.size();
4301
4302 // We need to detect various ways of spelling a rotation:
4303 // [11, 12, 13, 14, 15, 0, 1, 2]
4304 // [-1, 12, 13, 14, -1, -1, 1, -1]
4305 // [-1, -1, -1, -1, -1, -1, 1, 2]
4306 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4307 // [-1, 4, 5, 6, -1, -1, 9, -1]
4308 // [-1, 4, 5, 6, -1, -1, -1, -1]
4309 int Rotation = 0;
4310 LoSrc = -1;
4311 HiSrc = -1;
4312 for (int i = 0; i != Size; ++i) {
4313 int M = Mask[i];
4314 if (M < 0)
4315 continue;
4316
4317 // Determine where a rotate vector would have started.
4318 int StartIdx = i - (M % Size);
4319 // The identity rotation isn't interesting, stop.
4320 if (StartIdx == 0)
4321 return -1;
4322
4323 // If we found the tail of a vector the rotation must be the missing
4324 // front. If we found the head of a vector, it must be how much of the
4325 // head.
4326 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4327
4328 if (Rotation == 0)
4329 Rotation = CandidateRotation;
4330 else if (Rotation != CandidateRotation)
4331 // The rotations don't match, so we can't match this mask.
4332 return -1;
4333
4334 // Compute which value this mask is pointing at.
4335 int MaskSrc = M < Size ? 0 : 1;
4336
4337 // Compute which of the two target values this index should be assigned to.
4338 // This reflects whether the high elements are remaining or the low elemnts
4339 // are remaining.
4340 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4341
4342 // Either set up this value if we've not encountered it before, or check
4343 // that it remains consistent.
4344 if (TargetSrc < 0)
4345 TargetSrc = MaskSrc;
4346 else if (TargetSrc != MaskSrc)
4347 // This may be a rotation, but it pulls from the inputs in some
4348 // unsupported interleaving.
4349 return -1;
4350 }
4351
4352 // Check that we successfully analyzed the mask, and normalize the results.
4353 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4354 assert((LoSrc >= 0 || HiSrc >= 0) &&
4355 "Failed to find a rotated input vector!");
4356
4357 return Rotation;
4358}
4359
4360// Lower a deinterleave shuffle to vnsrl.
4361// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4362// -> [p, q, r, s] (EvenElts == false)
4363// VT is the type of the vector to return, <[vscale x ]n x ty>
4364// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4366 bool EvenElts,
4367 const RISCVSubtarget &Subtarget,
4368 SelectionDAG &DAG) {
4369 // The result is a vector of type <m x n x ty>
4370 MVT ContainerVT = VT;
4371 // Convert fixed vectors to scalable if needed
4372 if (ContainerVT.isFixedLengthVector()) {
4373 assert(Src.getSimpleValueType().isFixedLengthVector());
4374 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4375
4376 // The source is a vector of type <m x n*2 x ty>
4377 MVT SrcContainerVT =
4379 ContainerVT.getVectorElementCount() * 2);
4380 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4381 }
4382
4383 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4384
4385 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4386 // This also converts FP to int.
4387 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4388 MVT WideSrcContainerVT = MVT::getVectorVT(
4389 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4390 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4391
4392 // The integer version of the container type.
4393 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4394
4395 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4396 // the original element size.
4397 unsigned Shift = EvenElts ? 0 : EltBits;
4398 SDValue SplatShift = DAG.getNode(
4399 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4400 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4401 SDValue Res =
4402 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4403 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4404 // Cast back to FP if needed.
4405 Res = DAG.getBitcast(ContainerVT, Res);
4406
4407 if (VT.isFixedLengthVector())
4408 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4409 return Res;
4410}
4411
4412// Lower the following shuffle to vslidedown.
4413// a)
4414// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4415// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4416// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4417// b)
4418// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4419// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4420// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4421// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4422// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4423// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4425 SDValue V1, SDValue V2,
4426 ArrayRef<int> Mask,
4427 const RISCVSubtarget &Subtarget,
4428 SelectionDAG &DAG) {
4429 auto findNonEXTRACT_SUBVECTORParent =
4430 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4431 uint64_t Offset = 0;
4432 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4433 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4434 // a scalable vector. But we don't want to match the case.
4435 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4436 Offset += Parent.getConstantOperandVal(1);
4437 Parent = Parent.getOperand(0);
4438 }
4439 return std::make_pair(Parent, Offset);
4440 };
4441
4442 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4443 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4444
4445 // Extracting from the same source.
4446 SDValue Src = V1Src;
4447 if (Src != V2Src)
4448 return SDValue();
4449
4450 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4451 SmallVector<int, 16> NewMask(Mask);
4452 for (size_t i = 0; i != NewMask.size(); ++i) {
4453 if (NewMask[i] == -1)
4454 continue;
4455
4456 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4457 NewMask[i] = NewMask[i] + V1IndexOffset;
4458 } else {
4459 // Minus NewMask.size() is needed. Otherwise, the b case would be
4460 // <5,6,7,12> instead of <5,6,7,8>.
4461 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4462 }
4463 }
4464
4465 // First index must be known and non-zero. It will be used as the slidedown
4466 // amount.
4467 if (NewMask[0] <= 0)
4468 return SDValue();
4469
4470 // NewMask is also continuous.
4471 for (unsigned i = 1; i != NewMask.size(); ++i)
4472 if (NewMask[i - 1] + 1 != NewMask[i])
4473 return SDValue();
4474
4475 MVT XLenVT = Subtarget.getXLenVT();
4476 MVT SrcVT = Src.getSimpleValueType();
4477 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4478 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4479 SDValue Slidedown =
4480 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4481 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4482 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4483 return DAG.getNode(
4485 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4486 DAG.getConstant(0, DL, XLenVT));
4487}
4488
4489// Because vslideup leaves the destination elements at the start intact, we can
4490// use it to perform shuffles that insert subvectors:
4491//
4492// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4493// ->
4494// vsetvli zero, 8, e8, mf2, ta, ma
4495// vslideup.vi v8, v9, 4
4496//
4497// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4498// ->
4499// vsetvli zero, 5, e8, mf2, tu, ma
4500// vslideup.v1 v8, v9, 2
4502 SDValue V1, SDValue V2,
4503 ArrayRef<int> Mask,
4504 const RISCVSubtarget &Subtarget,
4505 SelectionDAG &DAG) {
4506 unsigned NumElts = VT.getVectorNumElements();
4507 int NumSubElts, Index;
4508 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4509 Index))
4510 return SDValue();
4511
4512 bool OpsSwapped = Mask[Index] < (int)NumElts;
4513 SDValue InPlace = OpsSwapped ? V2 : V1;
4514 SDValue ToInsert = OpsSwapped ? V1 : V2;
4515
4516 MVT XLenVT = Subtarget.getXLenVT();
4517 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4518 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4519 // We slide up by the index that the subvector is being inserted at, and set
4520 // VL to the index + the number of elements being inserted.
4522 // If the we're adding a suffix to the in place vector, i.e. inserting right
4523 // up to the very end of it, then we don't actually care about the tail.
4524 if (NumSubElts + Index >= (int)NumElts)
4525 Policy |= RISCVII::TAIL_AGNOSTIC;
4526
4527 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4528 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4529 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4530
4531 SDValue Res;
4532 // If we're inserting into the lowest elements, use a tail undisturbed
4533 // vmv.v.v.
4534 if (Index == 0)
4535 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4536 VL);
4537 else
4538 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4539 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4540 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4541}
4542
4543/// Match v(f)slide1up/down idioms. These operations involve sliding
4544/// N-1 elements to make room for an inserted scalar at one end.
4546 SDValue V1, SDValue V2,
4547 ArrayRef<int> Mask,
4548 const RISCVSubtarget &Subtarget,
4549 SelectionDAG &DAG) {
4550 bool OpsSwapped = false;
4551 if (!isa<BuildVectorSDNode>(V1)) {
4552 if (!isa<BuildVectorSDNode>(V2))
4553 return SDValue();
4554 std::swap(V1, V2);
4555 OpsSwapped = true;
4556 }
4557 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4558 if (!Splat)
4559 return SDValue();
4560
4561 // Return true if the mask could describe a slide of Mask.size() - 1
4562 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4563 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4564 const unsigned S = (Offset > 0) ? 0 : -Offset;
4565 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4566 for (unsigned i = S; i != E; ++i)
4567 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4568 return false;
4569 return true;
4570 };
4571
4572 const unsigned NumElts = VT.getVectorNumElements();
4573 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4574 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4575 return SDValue();
4576
4577 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4578 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4579 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4580 return SDValue();
4581
4582 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4583 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4584 auto OpCode = IsVSlidedown ?
4587 if (!VT.isFloatingPoint())
4588 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4589 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4590 DAG.getUNDEF(ContainerVT),
4591 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4592 Splat, TrueMask, VL);
4593 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4594}
4595
4596// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4597// to create an interleaved vector of <[vscale x] n*2 x ty>.
4598// This requires that the size of ty is less than the subtarget's maximum ELEN.
4600 const SDLoc &DL, SelectionDAG &DAG,
4601 const RISCVSubtarget &Subtarget) {
4602 MVT VecVT = EvenV.getSimpleValueType();
4603 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4604 // Convert fixed vectors to scalable if needed
4605 if (VecContainerVT.isFixedLengthVector()) {
4606 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4607 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4608 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4609 }
4610
4611 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4612
4613 // We're working with a vector of the same size as the resulting
4614 // interleaved vector, but with half the number of elements and
4615 // twice the SEW (Hence the restriction on not using the maximum
4616 // ELEN)
4617 MVT WideVT =
4619 VecVT.getVectorElementCount());
4620 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4621 if (WideContainerVT.isFixedLengthVector())
4622 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4623
4624 // Bitcast the input vectors to integers in case they are FP
4625 VecContainerVT = VecContainerVT.changeTypeToInteger();
4626 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4627 OddV = DAG.getBitcast(VecContainerVT, OddV);
4628
4629 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4630 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4631
4632 SDValue Interleaved;
4633 if (OddV.isUndef()) {
4634 // If OddV is undef, this is a zero extend.
4635 // FIXME: Not only does this optimize the code, it fixes some correctness
4636 // issues because MIR does not have freeze.
4637 Interleaved =
4638 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4639 } else if (Subtarget.hasStdExtZvbb()) {
4640 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4641 SDValue OffsetVec =
4642 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4643 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4644 OffsetVec, Passthru, Mask, VL);
4645 if (!EvenV.isUndef())
4646 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4647 Interleaved, EvenV, Passthru, Mask, VL);
4648 } else if (EvenV.isUndef()) {
4649 Interleaved =
4650 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4651
4652 SDValue OffsetVec =
4653 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4654 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4655 Interleaved, OffsetVec, Passthru, Mask, VL);
4656 } else {
4657 // FIXME: We should freeze the odd vector here. We already handled the case
4658 // of provably undef/poison above.
4659
4660 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4661 // vwaddu.vv
4662 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4663 OddV, Passthru, Mask, VL);
4664
4665 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4666 SDValue AllOnesVec = DAG.getSplatVector(
4667 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4668 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4669 OddV, AllOnesVec, Passthru, Mask, VL);
4670
4671 // Add the two together so we get
4672 // (OddV * 0xff...ff) + (OddV + EvenV)
4673 // = (OddV * 0x100...00) + EvenV
4674 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4675 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4676 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4677 Interleaved, OddsMul, Passthru, Mask, VL);
4678 }
4679
4680 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4681 MVT ResultContainerVT = MVT::getVectorVT(
4682 VecVT.getVectorElementType(), // Make sure to use original type
4683 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4684 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4685
4686 // Convert back to a fixed vector if needed
4687 MVT ResultVT =
4690 if (ResultVT.isFixedLengthVector())
4691 Interleaved =
4692 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4693
4694 return Interleaved;
4695}
4696
4697// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4698// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4700 SelectionDAG &DAG,
4701 const RISCVSubtarget &Subtarget) {
4702 SDLoc DL(SVN);
4703 MVT VT = SVN->getSimpleValueType(0);
4704 SDValue V = SVN->getOperand(0);
4705 unsigned NumElts = VT.getVectorNumElements();
4706
4707 assert(VT.getVectorElementType() == MVT::i1);
4708
4710 SVN->getMask().size()) ||
4711 !SVN->getOperand(1).isUndef())
4712 return SDValue();
4713
4714 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4715 EVT ViaVT = EVT::getVectorVT(
4716 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4717 EVT ViaBitVT =
4718 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4719
4720 // If we don't have zvbb or the larger element type > ELEN, the operation will
4721 // be illegal.
4723 ViaVT) ||
4724 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4725 return SDValue();
4726
4727 // If the bit vector doesn't fit exactly into the larger element type, we need
4728 // to insert it into the larger vector and then shift up the reversed bits
4729 // afterwards to get rid of the gap introduced.
4730 if (ViaEltSize > NumElts)
4731 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4732 V, DAG.getVectorIdxConstant(0, DL));
4733
4734 SDValue Res =
4735 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4736
4737 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4738 // element type.
4739 if (ViaEltSize > NumElts)
4740 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4741 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4742
4743 Res = DAG.getBitcast(ViaBitVT, Res);
4744
4745 if (ViaEltSize > NumElts)
4746 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4747 DAG.getVectorIdxConstant(0, DL));
4748 return Res;
4749}
4750
4752 SelectionDAG &DAG,
4753 const RISCVSubtarget &Subtarget,
4754 MVT &RotateVT, unsigned &RotateAmt) {
4755 SDLoc DL(SVN);
4756
4757 EVT VT = SVN->getValueType(0);
4758 unsigned NumElts = VT.getVectorNumElements();
4759 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4760 unsigned NumSubElts;
4761 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4762 NumElts, NumSubElts, RotateAmt))
4763 return false;
4764 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4765 NumElts / NumSubElts);
4766
4767 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4768 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4769}
4770
4771// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4772// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4773// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4775 SelectionDAG &DAG,
4776 const RISCVSubtarget &Subtarget) {
4777 SDLoc DL(SVN);
4778
4779 EVT VT = SVN->getValueType(0);
4780 unsigned RotateAmt;
4781 MVT RotateVT;
4782 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4783 return SDValue();
4784
4785 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4786
4787 SDValue Rotate;
4788 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4789 // so canonicalize to vrev8.
4790 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4791 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4792 else
4793 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4794 DAG.getConstant(RotateAmt, DL, RotateVT));
4795
4796 return DAG.getBitcast(VT, Rotate);
4797}
4798
4799// If compiling with an exactly known VLEN, see if we can split a
4800// shuffle on m2 or larger into a small number of m1 sized shuffles
4801// which write each destination registers exactly once.
4803 SelectionDAG &DAG,
4804 const RISCVSubtarget &Subtarget) {
4805 SDLoc DL(SVN);
4806 MVT VT = SVN->getSimpleValueType(0);
4807 SDValue V1 = SVN->getOperand(0);
4808 SDValue V2 = SVN->getOperand(1);
4809 ArrayRef<int> Mask = SVN->getMask();
4810 unsigned NumElts = VT.getVectorNumElements();
4811
4812 // If we don't know exact data layout, not much we can do. If this
4813 // is already m1 or smaller, no point in splitting further.
4814 const auto VLen = Subtarget.getRealVLen();
4815 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4816 return SDValue();
4817
4818 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4819 // expansion for.
4820 unsigned RotateAmt;
4821 MVT RotateVT;
4822 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4823 return SDValue();
4824
4825 MVT ElemVT = VT.getVectorElementType();
4826 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4827 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4828
4830 OutMasks(VRegsPerSrc, {-1, {}});
4831
4832 // Check if our mask can be done as a 1-to-1 mapping from source
4833 // to destination registers in the group without needing to
4834 // write each destination more than once.
4835 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4836 int DstVecIdx = DstIdx / ElemsPerVReg;
4837 int DstSubIdx = DstIdx % ElemsPerVReg;
4838 int SrcIdx = Mask[DstIdx];
4839 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4840 continue;
4841 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4842 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4843 if (OutMasks[DstVecIdx].first == -1)
4844 OutMasks[DstVecIdx].first = SrcVecIdx;
4845 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4846 // Note: This case could easily be handled by keeping track of a chain
4847 // of source values and generating two element shuffles below. This is
4848 // less an implementation question, and more a profitability one.
4849 return SDValue();
4850
4851 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4852 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4853 }
4854
4855 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4856 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4857 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4858 assert(M1VT == getLMUL1VT(M1VT));
4859 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4860 SDValue Vec = DAG.getUNDEF(ContainerVT);
4861 // The following semantically builds up a fixed length concat_vector
4862 // of the component shuffle_vectors. We eagerly lower to scalable here
4863 // to avoid DAG combining it back to a large shuffle_vector again.
4864 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4865 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4866 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4867 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4868 if (SrcVecIdx == -1)
4869 continue;
4870 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4871 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4872 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4873 DAG.getVectorIdxConstant(ExtractIdx, DL));
4874 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4875 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4876 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4877 unsigned InsertIdx = DstVecIdx * NumOpElts;
4878 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4879 DAG.getVectorIdxConstant(InsertIdx, DL));
4880 }
4881 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4882}
4883
4885 const RISCVSubtarget &Subtarget) {
4886 SDValue V1 = Op.getOperand(0);
4887 SDValue V2 = Op.getOperand(1);
4888 SDLoc DL(Op);
4889 MVT XLenVT = Subtarget.getXLenVT();
4890 MVT VT = Op.getSimpleValueType();
4891 unsigned NumElts = VT.getVectorNumElements();
4892 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4893
4894 if (VT.getVectorElementType() == MVT::i1) {
4895 // Lower to a vror.vi of a larger element type if possible before we promote
4896 // i1s to i8s.
4897 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4898 return V;
4899 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4900 return V;
4901
4902 // Promote i1 shuffle to i8 shuffle.
4903 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4904 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4905 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4906 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4907 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4908 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4909 ISD::SETNE);
4910 }
4911
4912 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4913
4914 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4915
4916 if (SVN->isSplat()) {
4917 const int Lane = SVN->getSplatIndex();
4918 if (Lane >= 0) {
4919 MVT SVT = VT.getVectorElementType();
4920
4921 // Turn splatted vector load into a strided load with an X0 stride.
4922 SDValue V = V1;
4923 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4924 // with undef.
4925 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4926 int Offset = Lane;
4927 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4928 int OpElements =
4929 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4930 V = V.getOperand(Offset / OpElements);
4931 Offset %= OpElements;
4932 }
4933
4934 // We need to ensure the load isn't atomic or volatile.
4935 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4936 auto *Ld = cast<LoadSDNode>(V);
4937 Offset *= SVT.getStoreSize();
4938 SDValue NewAddr = DAG.getMemBasePlusOffset(
4939 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
4940
4941 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4942 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4943 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4944 SDValue IntID =
4945 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4946 SDValue Ops[] = {Ld->getChain(),
4947 IntID,
4948 DAG.getUNDEF(ContainerVT),
4949 NewAddr,
4950 DAG.getRegister(RISCV::X0, XLenVT),
4951 VL};
4952 SDValue NewLoad = DAG.getMemIntrinsicNode(
4953 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4955 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4956 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4957 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4958 }
4959
4960 // Otherwise use a scalar load and splat. This will give the best
4961 // opportunity to fold a splat into the operation. ISel can turn it into
4962 // the x0 strided load if we aren't able to fold away the select.
4963 if (SVT.isFloatingPoint())
4964 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4965 Ld->getPointerInfo().getWithOffset(Offset),
4966 Ld->getOriginalAlign(),
4967 Ld->getMemOperand()->getFlags());
4968 else
4969 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4970 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4971 Ld->getOriginalAlign(),
4972 Ld->getMemOperand()->getFlags());
4974
4975 unsigned Opc =
4977 SDValue Splat =
4978 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4979 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4980 }
4981
4982 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4983 assert(Lane < (int)NumElts && "Unexpected lane!");
4984 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4985 V1, DAG.getConstant(Lane, DL, XLenVT),
4986 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4987 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4988 }
4989 }
4990
4991 // For exact VLEN m2 or greater, try to split to m1 operations if we
4992 // can split cleanly.
4993 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
4994 return V;
4995
4996 ArrayRef<int> Mask = SVN->getMask();
4997
4998 if (SDValue V =
4999 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5000 return V;
5001
5002 if (SDValue V =
5003 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5004 return V;
5005
5006 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5007 // available.
5008 if (Subtarget.hasStdExtZvkb())
5009 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5010 return V;
5011
5012 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5013 // be undef which can be handled with a single SLIDEDOWN/UP.
5014 int LoSrc, HiSrc;
5015 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5016 if (Rotation > 0) {
5017 SDValue LoV, HiV;
5018 if (LoSrc >= 0) {
5019 LoV = LoSrc == 0 ? V1 : V2;
5020 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5021 }
5022 if (HiSrc >= 0) {
5023 HiV = HiSrc == 0 ? V1 : V2;
5024 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5025 }
5026
5027 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5028 // to slide LoV up by (NumElts - Rotation).
5029 unsigned InvRotate = NumElts - Rotation;
5030
5031 SDValue Res = DAG.getUNDEF(ContainerVT);
5032 if (HiV) {
5033 // Even though we could use a smaller VL, don't to avoid a vsetivli
5034 // toggle.
5035 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5036 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5037 }
5038 if (LoV)
5039 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5040 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5042
5043 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5044 }
5045
5046 // If this is a deinterleave and we can widen the vector, then we can use
5047 // vnsrl to deinterleave.
5048 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5049 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5050 Subtarget, DAG);
5051 }
5052
5053 if (SDValue V =
5054 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5055 return V;
5056
5057 // Detect an interleave shuffle and lower to
5058 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5059 int EvenSrc, OddSrc;
5060 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5061 // Extract the halves of the vectors.
5062 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5063
5064 int Size = Mask.size();
5065 SDValue EvenV, OddV;
5066 assert(EvenSrc >= 0 && "Undef source?");
5067 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5068 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5069 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5070
5071 assert(OddSrc >= 0 && "Undef source?");
5072 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5073 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5074 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5075
5076 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5077 }
5078
5079
5080 // Handle any remaining single source shuffles
5081 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5082 if (V2.isUndef()) {
5083 // We might be able to express the shuffle as a bitrotate. But even if we
5084 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5085 // shifts and a vor will have a higher throughput than a vrgather.
5086 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5087 return V;
5088
5089 if (VT.getScalarSizeInBits() == 8 &&
5090 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5091 // On such a vector we're unable to use i8 as the index type.
5092 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5093 // may involve vector splitting if we're already at LMUL=8, or our
5094 // user-supplied maximum fixed-length LMUL.
5095 return SDValue();
5096 }
5097
5098 // Base case for the two operand recursion below - handle the worst case
5099 // single source shuffle.
5100 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5101 MVT IndexVT = VT.changeTypeToInteger();
5102 // Since we can't introduce illegal index types at this stage, use i16 and
5103 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5104 // than XLenVT.
5105 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5106 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5107 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5108 }
5109
5110 // If the mask allows, we can do all the index computation in 16 bits. This
5111 // requires less work and less register pressure at high LMUL, and creates
5112 // smaller constants which may be cheaper to materialize.
5113 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5114 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5115 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5116 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5117 }
5118
5119 MVT IndexContainerVT =
5120 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5121
5122 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5123 SmallVector<SDValue> GatherIndicesLHS;
5124 for (int MaskIndex : Mask) {
5125 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5126 GatherIndicesLHS.push_back(IsLHSIndex
5127 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5128 : DAG.getUNDEF(XLenVT));
5129 }
5130 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5131 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5132 Subtarget);
5133 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5134 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5135 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5136 }
5137
5138 // By default we preserve the original operand order, and use a mask to
5139 // select LHS as true and RHS as false. However, since RVV vector selects may
5140 // feature splats but only on the LHS, we may choose to invert our mask and
5141 // instead select between RHS and LHS.
5142 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5143
5144 // Detect shuffles which can be re-expressed as vector selects; these are
5145 // shuffles in which each element in the destination is taken from an element
5146 // at the corresponding index in either source vectors.
5147 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
5148 int MaskIndex = MaskIdx.value();
5149 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5150 });
5151 if (IsSelect) {
5152 // Now construct the mask that will be used by the vselect operation.
5153 SmallVector<SDValue> MaskVals;
5154 for (int MaskIndex : Mask) {
5155 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5156 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5157 }
5158
5159 if (SwapOps)
5160 std::swap(V1, V2);
5161
5162 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5163 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5164 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5165 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5166 }
5167
5168 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5169 // merged with a second vrgather.
5170 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5171 SmallVector<SDValue> MaskVals;
5172
5173 // Now construct the mask that will be used by the blended vrgather operation.
5174 // Cconstruct the appropriate indices into each vector.
5175 for (int MaskIndex : Mask) {
5176 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5177 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5178 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5179 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5180 ? MaskIndex : -1);
5181 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5182 }
5183
5184 if (SwapOps) {
5185 std::swap(V1, V2);
5186 std::swap(ShuffleMaskLHS, ShuffleMaskRHS);
5187 }
5188
5189 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5190 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5191 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5192
5193 // Recursively invoke lowering for each operand if we had two
5194 // independent single source shuffles, and then combine the result via a
5195 // vselect. Note that the vselect will likely be folded back into the
5196 // second permute (vrgather, or other) by the post-isel combine.
5197 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5198 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5199 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5200}
5201
5203 // Support splats for any type. These should type legalize well.
5204 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5205 return true;
5206
5207 // Only support legal VTs for other shuffles for now.
5208 if (!isTypeLegal(VT))
5209 return false;
5210
5211 MVT SVT = VT.getSimpleVT();
5212
5213 // Not for i1 vectors.
5214 if (SVT.getScalarType() == MVT::i1)
5215 return false;
5216
5217 int Dummy1, Dummy2;
5218 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5219 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5220}
5221
5222// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5223// the exponent.
5224SDValue
5225RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5226 SelectionDAG &DAG) const {
5227 MVT VT = Op.getSimpleValueType();
5228 unsigned EltSize = VT.getScalarSizeInBits();
5229 SDValue Src = Op.getOperand(0);
5230 SDLoc DL(Op);
5231 MVT ContainerVT = VT;
5232
5233 SDValue Mask, VL;
5234 if (Op->isVPOpcode()) {
5235 Mask = Op.getOperand(1);
5236 if (VT.isFixedLengthVector())
5237 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5238 Subtarget);
5239 VL = Op.getOperand(2);
5240 }
5241
5242 // We choose FP type that can represent the value if possible. Otherwise, we
5243 // use rounding to zero conversion for correct exponent of the result.
5244 // TODO: Use f16 for i8 when possible?
5245 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5246 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5247 FloatEltVT = MVT::f32;
5248 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5249
5250 // Legal types should have been checked in the RISCVTargetLowering
5251 // constructor.
5252 // TODO: Splitting may make sense in some cases.
5253 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5254 "Expected legal float type!");
5255
5256 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5257 // The trailing zero count is equal to log2 of this single bit value.
5258 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5259 SDValue Neg = DAG.getNegative(Src, DL, VT);
5260 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5261 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5262 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5263 Src, Mask, VL);
5264 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5265 }
5266
5267 // We have a legal FP type, convert to it.
5268 SDValue FloatVal;
5269 if (FloatVT.bitsGT(VT)) {
5270 if (Op->isVPOpcode())
5271 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5272 else
5273 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5274 } else {
5275 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5276 if (VT.isFixedLengthVector()) {
5277 ContainerVT = getContainerForFixedLengthVector(VT);
5278 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5279 }
5280 if (!Op->isVPOpcode())
5281 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5282 SDValue RTZRM =
5284 MVT ContainerFloatVT =
5285 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5286 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5287 Src, Mask, RTZRM, VL);
5288 if (VT.isFixedLengthVector())
5289 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5290 }
5291 // Bitcast to integer and shift the exponent to the LSB.
5292 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5293 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5294 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5295
5296 SDValue Exp;
5297 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5298 if (Op->isVPOpcode()) {
5299 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
5300 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5301 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5302 } else {
5303 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5304 DAG.getConstant(ShiftAmt, DL, IntVT));
5305 if (IntVT.bitsLT(VT))
5306 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5307 else if (IntVT.bitsGT(VT))
5308 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5309 }
5310
5311 // The exponent contains log2 of the value in biased form.
5312 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5313 // For trailing zeros, we just need to subtract the bias.
5314 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5315 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5316 DAG.getConstant(ExponentBias, DL, VT));
5317 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5318 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5319 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5320
5321 // For leading zeros, we need to remove the bias and convert from log2 to
5322 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5323 unsigned Adjust = ExponentBias + (EltSize - 1);
5324 SDValue Res;
5325 if (Op->isVPOpcode())
5326 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5327 Mask, VL);
5328 else
5329 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5330
5331 // The above result with zero input equals to Adjust which is greater than
5332 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5333 if (Op.getOpcode() == ISD::CTLZ)
5334 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5335 else if (Op.getOpcode() == ISD::VP_CTLZ)
5336 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5337 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5338 return Res;
5339}
5340
5341// While RVV has alignment restrictions, we should always be able to load as a
5342// legal equivalently-sized byte-typed vector instead. This method is
5343// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5344// the load is already correctly-aligned, it returns SDValue().
5345SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5346 SelectionDAG &DAG) const {
5347 auto *Load = cast<LoadSDNode>(Op);
5348 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5349
5351 Load->getMemoryVT(),
5352 *Load->getMemOperand()))
5353 return SDValue();
5354
5355 SDLoc DL(Op);
5356 MVT VT = Op.getSimpleValueType();
5357 unsigned EltSizeBits = VT.getScalarSizeInBits();
5358 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5359 "Unexpected unaligned RVV load type");
5360 MVT NewVT =
5361 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5362 assert(NewVT.isValid() &&
5363 "Expecting equally-sized RVV vector types to be legal");
5364 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5365 Load->getPointerInfo(), Load->getOriginalAlign(),
5366 Load->getMemOperand()->getFlags());
5367 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5368}
5369
5370// While RVV has alignment restrictions, we should always be able to store as a
5371// legal equivalently-sized byte-typed vector instead. This method is
5372// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5373// returns SDValue() if the store is already correctly aligned.
5374SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5375 SelectionDAG &DAG) const {
5376 auto *Store = cast<StoreSDNode>(Op);
5377 assert(Store && Store->getValue().getValueType().isVector() &&
5378 "Expected vector store");
5379
5381 Store->getMemoryVT(),
5382 *Store->getMemOperand()))
5383 return SDValue();
5384
5385 SDLoc DL(Op);
5386 SDValue StoredVal = Store->getValue();
5387 MVT VT = StoredVal.getSimpleValueType();
5388 unsigned EltSizeBits = VT.getScalarSizeInBits();
5389 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5390 "Unexpected unaligned RVV store type");
5391 MVT NewVT =
5392 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5393 assert(NewVT.isValid() &&
5394 "Expecting equally-sized RVV vector types to be legal");
5395 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5396 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5397 Store->getPointerInfo(), Store->getOriginalAlign(),
5398 Store->getMemOperand()->getFlags());
5399}
5400
5402 const RISCVSubtarget &Subtarget) {
5403 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5404
5405 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5406
5407 // All simm32 constants should be handled by isel.
5408 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5409 // this check redundant, but small immediates are common so this check
5410 // should have better compile time.
5411 if (isInt<32>(Imm))
5412 return Op;
5413
5414 // We only need to cost the immediate, if constant pool lowering is enabled.
5415 if (!Subtarget.useConstantPoolForLargeInts())
5416 return Op;
5417
5419 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5420 return Op;
5421
5422 // Optimizations below are disabled for opt size. If we're optimizing for
5423 // size, use a constant pool.
5424 if (DAG.shouldOptForSize())
5425 return SDValue();
5426
5427 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5428 // that if it will avoid a constant pool.
5429 // It will require an extra temporary register though.
5430 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5431 // low and high 32 bits are the same and bit 31 and 63 are set.
5432 unsigned ShiftAmt, AddOpc;
5433 RISCVMatInt::InstSeq SeqLo =
5434 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5435 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5436 return Op;
5437
5438 return SDValue();
5439}
5440
5442 const RISCVSubtarget &Subtarget) {
5443 SDLoc dl(Op);
5444 AtomicOrdering FenceOrdering =
5445 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5446 SyncScope::ID FenceSSID =
5447 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5448
5449 if (Subtarget.hasStdExtZtso()) {
5450 // The only fence that needs an instruction is a sequentially-consistent
5451 // cross-thread fence.
5452 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5453 FenceSSID == SyncScope::System)
5454 return Op;
5455
5456 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5457 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5458 }
5459
5460 // singlethread fences only synchronize with signal handlers on the same
5461 // thread and thus only need to preserve instruction order, not actually
5462 // enforce memory ordering.
5463 if (FenceSSID == SyncScope::SingleThread)
5464 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5465 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5466
5467 return Op;
5468}
5469
5471 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5472 "Unexpected custom legalisation");
5473
5474 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5475 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5476 SDLoc DL(Op);
5477 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5478 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5479 SDValue Result =
5480 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5481
5482 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5483 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5484 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5485 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5486 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5487 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5488 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5489}
5490
5492 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5493 "Unexpected custom legalisation");
5494
5495 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5496 // sign extend allows overflow of the lower 32 bits to be detected on
5497 // the promoted size.
5498 SDLoc DL(Op);
5499 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5500 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5501 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5502 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5503}
5504
5505// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5507 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5508 "Unexpected custom legalisation");
5509 if (isa<ConstantSDNode>(Op.getOperand(1)))
5510 return SDValue();
5511
5512 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5513 SDLoc DL(Op);
5514 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5515 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5516 SDValue WideOp =
5517 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5518 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5519 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5520 DAG.getValueType(MVT::i32));
5521 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5522 ISD::SETNE);
5523 return DAG.getMergeValues({Res, Ovf}, DL);
5524}
5525
5526// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5528 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5529 "Unexpected custom legalisation");
5530 SDLoc DL(Op);
5531 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5532 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5533 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5534 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5535 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5536 DAG.getValueType(MVT::i32));
5537 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5538 ISD::SETNE);
5539 return DAG.getMergeValues({Res, Ovf}, DL);
5540}
5541
5542SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5543 SelectionDAG &DAG) const {
5544 SDLoc DL(Op);
5545 MVT VT = Op.getSimpleValueType();
5546 MVT XLenVT = Subtarget.getXLenVT();
5547 unsigned Check = Op.getConstantOperandVal(1);
5548 unsigned TDCMask = 0;
5549 if (Check & fcSNan)
5550 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5551 if (Check & fcQNan)
5552 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5553 if (Check & fcPosInf)
5555 if (Check & fcNegInf)
5557 if (Check & fcPosNormal)
5559 if (Check & fcNegNormal)
5561 if (Check & fcPosSubnormal)
5563 if (Check & fcNegSubnormal)
5565 if (Check & fcPosZero)
5566 TDCMask |= RISCV::FPMASK_Positive_Zero;
5567 if (Check & fcNegZero)
5568 TDCMask |= RISCV::FPMASK_Negative_Zero;
5569
5570 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5571
5572 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5573
5574 if (VT.isVector()) {
5575 SDValue Op0 = Op.getOperand(0);
5576 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5577
5578 if (VT.isScalableVector()) {
5580 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5581 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5582 Mask = Op.getOperand(2);
5583 VL = Op.getOperand(3);
5584 }
5585 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5586 VL, Op->getFlags());
5587 if (IsOneBitMask)
5588 return DAG.getSetCC(DL, VT, FPCLASS,
5589 DAG.getConstant(TDCMask, DL, DstVT),
5591 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5592 DAG.getConstant(TDCMask, DL, DstVT));
5593 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5594 ISD::SETNE);
5595 }
5596
5597 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5598 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5599 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5600 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5601 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5602 Mask = Op.getOperand(2);
5603 MVT MaskContainerVT =
5604 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5605 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5606 VL = Op.getOperand(3);
5607 }
5608 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5609
5610 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5611 Mask, VL, Op->getFlags());
5612
5613 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5614 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5615 if (IsOneBitMask) {
5616 SDValue VMSEQ =
5617 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5618 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5619 DAG.getUNDEF(ContainerVT), Mask, VL});
5620 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5621 }
5622 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5623 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5624
5625 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5626 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5627 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5628
5629 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5630 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5631 DAG.getUNDEF(ContainerVT), Mask, VL});
5632 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5633 }
5634
5635 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5636 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5637 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5639 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5640}
5641
5642// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5643// operations propagate nans.
5645 const RISCVSubtarget &Subtarget) {
5646 SDLoc DL(Op);
5647 MVT VT = Op.getSimpleValueType();
5648
5649 SDValue X = Op.getOperand(0);
5650 SDValue Y = Op.getOperand(1);
5651
5652 if (!VT.isVector()) {
5653 MVT XLenVT = Subtarget.getXLenVT();
5654
5655 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5656 // ensures that when one input is a nan, the other will also be a nan
5657 // allowing the nan to propagate. If both inputs are nan, this will swap the
5658 // inputs which is harmless.
5659
5660 SDValue NewY = Y;
5661 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5662 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5663 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5664 }
5665
5666 SDValue NewX = X;
5667 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5668 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5669 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5670 }
5671
5672 unsigned Opc =
5673 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5674 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5675 }
5676
5677 // Check no NaNs before converting to fixed vector scalable.
5678 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5679 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5680
5681 MVT ContainerVT = VT;
5682 if (VT.isFixedLengthVector()) {
5683 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5684 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5685 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5686 }
5687
5688 SDValue Mask, VL;
5689 if (Op->isVPOpcode()) {
5690 Mask = Op.getOperand(2);
5691 if (VT.isFixedLengthVector())
5692 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5693 Subtarget);
5694 VL = Op.getOperand(3);
5695 } else {
5696 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5697 }
5698
5699 SDValue NewY = Y;
5700 if (!XIsNeverNan) {
5701 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5702 {X, X, DAG.getCondCode(ISD::SETOEQ),
5703 DAG.getUNDEF(ContainerVT), Mask, VL});
5704 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5705 DAG.getUNDEF(ContainerVT), VL);
5706 }
5707
5708 SDValue NewX = X;
5709 if (!YIsNeverNan) {
5710 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5711 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5712 DAG.getUNDEF(ContainerVT), Mask, VL});
5713 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5714 DAG.getUNDEF(ContainerVT), VL);
5715 }
5716
5717 unsigned Opc =
5718 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5721 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5722 DAG.getUNDEF(ContainerVT), Mask, VL);
5723 if (VT.isFixedLengthVector())
5724 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5725 return Res;
5726}
5727
5728/// Get a RISC-V target specified VL op for a given SDNode.
5729static unsigned getRISCVVLOp(SDValue Op) {
5730#define OP_CASE(NODE) \
5731 case ISD::NODE: \
5732 return RISCVISD::NODE##_VL;
5733#define VP_CASE(NODE) \
5734 case ISD::VP_##NODE: \
5735 return RISCVISD::NODE##_VL;
5736 // clang-format off
5737 switch (Op.getOpcode()) {
5738 default:
5739 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5740 OP_CASE(ADD)
5741 OP_CASE(SUB)
5742 OP_CASE(MUL)
5743 OP_CASE(MULHS)
5744 OP_CASE(MULHU)
5745 OP_CASE(SDIV)
5746 OP_CASE(SREM)
5747 OP_CASE(UDIV)
5748 OP_CASE(UREM)
5749 OP_CASE(SHL)
5750 OP_CASE(SRA)
5751 OP_CASE(SRL)
5752 OP_CASE(ROTL)
5753 OP_CASE(ROTR)
5754 OP_CASE(BSWAP)
5755 OP_CASE(CTTZ)
5756 OP_CASE(CTLZ)
5757 OP_CASE(CTPOP)
5758 OP_CASE(BITREVERSE)
5759 OP_CASE(SADDSAT)
5760 OP_CASE(UADDSAT)
5761 OP_CASE(SSUBSAT)
5762 OP_CASE(USUBSAT)
5763 OP_CASE(AVGFLOORU)
5764 OP_CASE(AVGCEILU)
5765 OP_CASE(FADD)
5766 OP_CASE(FSUB)
5767 OP_CASE(FMUL)
5768 OP_CASE(FDIV)
5769 OP_CASE(FNEG)
5770 OP_CASE(FABS)
5771 OP_CASE(FSQRT)
5772 OP_CASE(SMIN)
5773 OP_CASE(SMAX)
5774 OP_CASE(UMIN)
5775 OP_CASE(UMAX)
5776 OP_CASE(STRICT_FADD)
5777 OP_CASE(STRICT_FSUB)
5778 OP_CASE(STRICT_FMUL)
5779 OP_CASE(STRICT_FDIV)
5780 OP_CASE(STRICT_FSQRT)
5781 VP_CASE(ADD) // VP_ADD
5782 VP_CASE(SUB) // VP_SUB
5783 VP_CASE(MUL) // VP_MUL
5784 VP_CASE(SDIV) // VP_SDIV
5785 VP_CASE(SREM) // VP_SREM
5786 VP_CASE(UDIV) // VP_UDIV
5787 VP_CASE(UREM) // VP_UREM
5788 VP_CASE(SHL) // VP_SHL
5789 VP_CASE(FADD) // VP_FADD
5790 VP_CASE(FSUB) // VP_FSUB
5791 VP_CASE(FMUL) // VP_FMUL
5792 VP_CASE(FDIV) // VP_FDIV
5793 VP_CASE(FNEG) // VP_FNEG
5794 VP_CASE(FABS) // VP_FABS
5795 VP_CASE(SMIN) // VP_SMIN
5796 VP_CASE(SMAX) // VP_SMAX
5797 VP_CASE(UMIN) // VP_UMIN
5798 VP_CASE(UMAX) // VP_UMAX
5799 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5800 VP_CASE(SETCC) // VP_SETCC
5801 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5802 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5803 VP_CASE(BITREVERSE) // VP_BITREVERSE
5804 VP_CASE(SADDSAT) // VP_SADDSAT
5805 VP_CASE(UADDSAT) // VP_UADDSAT
5806 VP_CASE(SSUBSAT) // VP_SSUBSAT
5807 VP_CASE(USUBSAT) // VP_USUBSAT
5808 VP_CASE(BSWAP) // VP_BSWAP
5809 VP_CASE(CTLZ) // VP_CTLZ
5810 VP_CASE(CTTZ) // VP_CTTZ
5811 VP_CASE(CTPOP) // VP_CTPOP
5813 case ISD::VP_CTLZ_ZERO_UNDEF:
5814 return RISCVISD::CTLZ_VL;
5816 case ISD::VP_CTTZ_ZERO_UNDEF:
5817 return RISCVISD::CTTZ_VL;
5818 case ISD::FMA:
5819 case ISD::VP_FMA:
5820 return RISCVISD::VFMADD_VL;
5821 case ISD::STRICT_FMA:
5823 case ISD::AND:
5824 case ISD::VP_AND:
5825 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5826 return RISCVISD::VMAND_VL;
5827 return RISCVISD::AND_VL;
5828 case ISD::OR:
5829 case ISD::VP_OR:
5830 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5831 return RISCVISD::VMOR_VL;
5832 return RISCVISD::OR_VL;
5833 case ISD::XOR:
5834 case ISD::VP_XOR:
5835 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5836 return RISCVISD::VMXOR_VL;
5837 return RISCVISD::XOR_VL;
5838 case ISD::VP_SELECT:
5839 case ISD::VP_MERGE:
5840 return RISCVISD::VMERGE_VL;
5841 case ISD::VP_ASHR:
5842 return RISCVISD::SRA_VL;
5843 case ISD::VP_LSHR:
5844 return RISCVISD::SRL_VL;
5845 case ISD::VP_SQRT:
5846 return RISCVISD::FSQRT_VL;
5847 case ISD::VP_SIGN_EXTEND:
5848 return RISCVISD::VSEXT_VL;
5849 case ISD::VP_ZERO_EXTEND:
5850 return RISCVISD::VZEXT_VL;
5851 case ISD::VP_FP_TO_SINT:
5853 case ISD::VP_FP_TO_UINT:
5855 case ISD::FMINNUM:
5856 case ISD::VP_FMINNUM:
5857 return RISCVISD::VFMIN_VL;
5858 case ISD::FMAXNUM:
5859 case ISD::VP_FMAXNUM:
5860 return RISCVISD::VFMAX_VL;
5861 case ISD::LRINT:
5862 case ISD::VP_LRINT:
5863 case ISD::LLRINT:
5864 case ISD::VP_LLRINT:
5866 }
5867 // clang-format on
5868#undef OP_CASE
5869#undef VP_CASE
5870}
5871
5872/// Return true if a RISC-V target specified op has a merge operand.
5873static bool hasMergeOp(unsigned Opcode) {
5874 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5876 "not a RISC-V target specific op");
5878 126 &&
5881 21 &&
5882 "adding target specific op should update this function");
5883 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5884 return true;
5885 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5886 return true;
5887 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5888 return true;
5889 if (Opcode == RISCVISD::SETCC_VL)
5890 return true;
5891 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5892 return true;
5893 if (Opcode == RISCVISD::VMERGE_VL)
5894 return true;
5895 return false;
5896}
5897
5898/// Return true if a RISC-V target specified op has a mask operand.
5899static bool hasMaskOp(unsigned Opcode) {
5900 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5902 "not a RISC-V target specific op");
5904 126 &&
5907 21 &&
5908 "adding target specific op should update this function");
5909 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5910 return true;
5911 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5912 return true;
5913 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5915 return true;
5916 return false;
5917}
5918
5920 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5921 SDLoc DL(Op);
5922
5925
5926 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5927 if (!Op.getOperand(j).getValueType().isVector()) {
5928 LoOperands[j] = Op.getOperand(j);
5929 HiOperands[j] = Op.getOperand(j);
5930 continue;
5931 }
5932 std::tie(LoOperands[j], HiOperands[j]) =
5933 DAG.SplitVector(Op.getOperand(j), DL);
5934 }
5935
5936 SDValue LoRes =
5937 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5938 SDValue HiRes =
5939 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5940
5941 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5942}
5943
5945 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5946 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5947 SDLoc DL(Op);
5948
5951
5952 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5953 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
5954 std::tie(LoOperands[j], HiOperands[j]) =
5955 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
5956 continue;
5957 }
5958 if (!Op.getOperand(j).getValueType().isVector()) {
5959 LoOperands[j] = Op.getOperand(j);
5960 HiOperands[j] = Op.getOperand(j);
5961 continue;
5962 }
5963 std::tie(LoOperands[j], HiOperands[j]) =
5964 DAG.SplitVector(Op.getOperand(j), DL);
5965 }
5966
5967 SDValue LoRes =
5968 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5969 SDValue HiRes =
5970 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5971
5972 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5973}
5974
5976 SDLoc DL(Op);
5977
5978 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
5979 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
5980 auto [EVLLo, EVLHi] =
5981 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
5982
5983 SDValue ResLo =
5984 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5985 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
5986 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5987 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
5988}
5989
5991
5992 assert(Op->isStrictFPOpcode());
5993
5994 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
5995
5996 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
5997 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
5998
5999 SDLoc DL(Op);
6000
6003
6004 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6005 if (!Op.getOperand(j).getValueType().isVector()) {
6006 LoOperands[j] = Op.getOperand(j);
6007 HiOperands[j] = Op.getOperand(j);
6008 continue;
6009 }
6010 std::tie(LoOperands[j], HiOperands[j]) =
6011 DAG.SplitVector(Op.getOperand(j), DL);
6012 }
6013
6014 SDValue LoRes =
6015 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6016 HiOperands[0] = LoRes.getValue(1);
6017 SDValue HiRes =
6018 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6019
6020 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6021 LoRes.getValue(0), HiRes.getValue(0));
6022 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6023}
6024
6026 SelectionDAG &DAG) const {
6027 switch (Op.getOpcode()) {
6028 default:
6029 report_fatal_error("unimplemented operand");
6030 case ISD::ATOMIC_FENCE:
6031 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6032 case ISD::GlobalAddress:
6033 return lowerGlobalAddress(Op, DAG);
6034 case ISD::BlockAddress:
6035 return lowerBlockAddress(Op, DAG);
6036 case ISD::ConstantPool:
6037 return lowerConstantPool(Op, DAG);
6038 case ISD::JumpTable:
6039 return lowerJumpTable(Op, DAG);
6041 return lowerGlobalTLSAddress(Op, DAG);
6042 case ISD::Constant:
6043 return lowerConstant(Op, DAG, Subtarget);
6044 case ISD::SELECT:
6045 return lowerSELECT(Op, DAG);
6046 case ISD::BRCOND:
6047 return lowerBRCOND(Op, DAG);
6048 case ISD::VASTART:
6049 return lowerVASTART(Op, DAG);
6050 case ISD::FRAMEADDR:
6051 return lowerFRAMEADDR(Op, DAG);
6052 case ISD::RETURNADDR:
6053 return lowerRETURNADDR(Op, DAG);
6054 case ISD::SADDO:
6055 case ISD::SSUBO:
6056 return lowerSADDO_SSUBO(Op, DAG);
6057 case ISD::SMULO:
6058 return lowerSMULO(Op, DAG);
6059 case ISD::SHL_PARTS:
6060 return lowerShiftLeftParts(Op, DAG);
6061 case ISD::SRA_PARTS:
6062 return lowerShiftRightParts(Op, DAG, true);
6063 case ISD::SRL_PARTS:
6064 return lowerShiftRightParts(Op, DAG, false);
6065 case ISD::ROTL:
6066 case ISD::ROTR:
6067 if (Op.getValueType().isFixedLengthVector()) {
6068 assert(Subtarget.hasStdExtZvkb());
6069 return lowerToScalableOp(Op, DAG);
6070 }
6071 assert(Subtarget.hasVendorXTHeadBb() &&
6072 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6073 "Unexpected custom legalization");
6074 // XTHeadBb only supports rotate by constant.
6075 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6076 return SDValue();
6077 return Op;
6078 case ISD::BITCAST: {
6079 SDLoc DL(Op);
6080 EVT VT = Op.getValueType();
6081 SDValue Op0 = Op.getOperand(0);
6082 EVT Op0VT = Op0.getValueType();
6083 MVT XLenVT = Subtarget.getXLenVT();
6084 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6085 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6086 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6087 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6088 return FPConv;
6089 }
6090 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6091 Subtarget.hasStdExtZfbfmin()) {
6092 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6093 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6094 return FPConv;
6095 }
6096 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6097 Subtarget.hasStdExtFOrZfinx()) {
6098 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6099 SDValue FPConv =
6100 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6101 return FPConv;
6102 }
6103 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6104 SDValue Lo, Hi;
6105 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6106 SDValue RetReg =
6107 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6108 return RetReg;
6109 }
6110
6111 // Consider other scalar<->scalar casts as legal if the types are legal.
6112 // Otherwise expand them.
6113 if (!VT.isVector() && !Op0VT.isVector()) {
6114 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6115 return Op;
6116 return SDValue();
6117 }
6118
6119 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6120 "Unexpected types");
6121
6122 if (VT.isFixedLengthVector()) {
6123 // We can handle fixed length vector bitcasts with a simple replacement
6124 // in isel.
6125 if (Op0VT.isFixedLengthVector())
6126 return Op;
6127 // When bitcasting from scalar to fixed-length vector, insert the scalar
6128 // into a one-element vector of the result type, and perform a vector
6129 // bitcast.
6130 if (!Op0VT.isVector()) {
6131 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6132 if (!isTypeLegal(BVT))
6133 return SDValue();
6134 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6135 DAG.getUNDEF(BVT), Op0,
6136 DAG.getVectorIdxConstant(0, DL)));
6137 }
6138 return SDValue();
6139 }
6140 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6141 // thus: bitcast the vector to a one-element vector type whose element type
6142 // is the same as the result type, and extract the first element.
6143 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6144 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6145 if (!isTypeLegal(BVT))
6146 return SDValue();
6147 SDValue BVec = DAG.getBitcast(BVT, Op0);
6148 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6149 DAG.getVectorIdxConstant(0, DL));
6150 }
6151 return SDValue();
6152 }
6154 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6156 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6158 return LowerINTRINSIC_VOID(Op, DAG);
6159 case ISD::IS_FPCLASS:
6160 return LowerIS_FPCLASS(Op, DAG);
6161 case ISD::BITREVERSE: {
6162 MVT VT = Op.getSimpleValueType();
6163 if (VT.isFixedLengthVector()) {
6164 assert(Subtarget.hasStdExtZvbb());
6165 return lowerToScalableOp(Op, DAG);
6166 }
6167 SDLoc DL(Op);
6168 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6169 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6170 // Expand bitreverse to a bswap(rev8) followed by brev8.
6171 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6172 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6173 }
6174 case ISD::TRUNCATE:
6175 // Only custom-lower vector truncates
6176 if (!Op.getSimpleValueType().isVector())
6177 return Op;
6178 return lowerVectorTruncLike(Op, DAG);
6179 case ISD::ANY_EXTEND:
6180 case ISD::ZERO_EXTEND:
6181 if (Op.getOperand(0).getValueType().isVector() &&
6182 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6183 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6184 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6185 case ISD::SIGN_EXTEND:
6186 if (Op.getOperand(0).getValueType().isVector() &&
6187 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6188 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6189 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6191 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6193 return lowerINSERT_VECTOR_ELT(Op, DAG);
6195 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6196 case ISD::SCALAR_TO_VECTOR: {
6197 MVT VT = Op.getSimpleValueType();
6198 SDLoc DL(Op);
6199 SDValue Scalar = Op.getOperand(0);
6200 if (VT.getVectorElementType() == MVT::i1) {
6201 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6202 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6203 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6204 }
6205 MVT ContainerVT = VT;
6206 if (VT.isFixedLengthVector())
6207 ContainerVT = getContainerForFixedLengthVector(VT);
6208 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6209 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6210 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6211 DAG.getUNDEF(ContainerVT), Scalar, VL);
6212 if (VT.isFixedLengthVector())
6213 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6214 return V;
6215 }
6216 case ISD::VSCALE: {
6217 MVT XLenVT = Subtarget.getXLenVT();
6218 MVT VT = Op.getSimpleValueType();
6219 SDLoc DL(Op);
6220 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6221 // We define our scalable vector types for lmul=1 to use a 64 bit known
6222 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6223 // vscale as VLENB / 8.
6224 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6225 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6226 report_fatal_error("Support for VLEN==32 is incomplete.");
6227 // We assume VLENB is a multiple of 8. We manually choose the best shift
6228 // here because SimplifyDemandedBits isn't always able to simplify it.
6229 uint64_t Val = Op.getConstantOperandVal(0);
6230 if (isPowerOf2_64(Val)) {
6231 uint64_t Log2 = Log2_64(Val);
6232 if (Log2 < 3)
6233 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6234 DAG.getConstant(3 - Log2, DL, VT));
6235 else if (Log2 > 3)
6236 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6237 DAG.getConstant(Log2 - 3, DL, XLenVT));
6238 } else if ((Val % 8) == 0) {
6239 // If the multiplier is a multiple of 8, scale it down to avoid needing
6240 // to shift the VLENB value.
6241 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6242 DAG.getConstant(Val / 8, DL, XLenVT));
6243 } else {
6244 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6245 DAG.getConstant(3, DL, XLenVT));
6246 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6247 DAG.getConstant(Val, DL, XLenVT));
6248 }
6249 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6250 }
6251 case ISD::FPOWI: {
6252 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6253 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6254 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6255 Op.getOperand(1).getValueType() == MVT::i32) {
6256 SDLoc DL(Op);
6257 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6258 SDValue Powi =
6259 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6260 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6261 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6262 }
6263 return SDValue();
6264 }
6265 case ISD::FMAXIMUM:
6266 case ISD::FMINIMUM:
6267 if (Op.getValueType() == MVT::nxv32f16 &&
6268 (Subtarget.hasVInstructionsF16Minimal() &&
6269 !Subtarget.hasVInstructionsF16()))
6270 return SplitVectorOp(Op, DAG);
6271 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6272 case ISD::FP_EXTEND: {
6273 SDLoc DL(Op);
6274 EVT VT = Op.getValueType();
6275 SDValue Op0 = Op.getOperand(0);
6276 EVT Op0VT = Op0.getValueType();
6277 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6278 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6279 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6280 SDValue FloatVal =
6281 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6282 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6283 }
6284
6285 if (!Op.getValueType().isVector())
6286 return Op;
6287 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6288 }
6289 case ISD::FP_ROUND: {
6290 SDLoc DL(Op);
6291 EVT VT = Op.getValueType();
6292 SDValue Op0 = Op.getOperand(0);
6293 EVT Op0VT = Op0.getValueType();
6294 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6295 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6296 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6297 Subtarget.hasStdExtDOrZdinx()) {
6298 SDValue FloatVal =
6299 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6300 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6301 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6302 }
6303
6304 if (!Op.getValueType().isVector())
6305 return Op;
6306 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6307 }
6310 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6311 case ISD::SINT_TO_FP:
6312 case ISD::UINT_TO_FP:
6313 if (Op.getValueType().isVector() &&
6314 Op.getValueType().getScalarType() == MVT::f16 &&
6315 (Subtarget.hasVInstructionsF16Minimal() &&
6316 !Subtarget.hasVInstructionsF16())) {
6317 if (Op.getValueType() == MVT::nxv32f16)
6318 return SplitVectorOp(Op, DAG);
6319 // int -> f32
6320 SDLoc DL(Op);
6321 MVT NVT =
6322 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6323 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6324 // f32 -> f16
6325 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6326 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6327 }
6328 [[fallthrough]];
6329 case ISD::FP_TO_SINT:
6330 case ISD::FP_TO_UINT:
6331 if (SDValue Op1 = Op.getOperand(0);
6332 Op1.getValueType().isVector() &&
6333 Op1.getValueType().getScalarType() == MVT::f16 &&
6334 (Subtarget.hasVInstructionsF16Minimal() &&
6335 !Subtarget.hasVInstructionsF16())) {
6336 if (Op1.getValueType() == MVT::nxv32f16)
6337 return SplitVectorOp(Op, DAG);
6338 // f16 -> f32
6339 SDLoc DL(Op);
6340 MVT NVT = MVT::getVectorVT(MVT::f32,
6341 Op1.getValueType().getVectorElementCount());
6342 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6343 // f32 -> int
6344 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6345 }
6346 [[fallthrough]];
6351 // RVV can only do fp<->int conversions to types half/double the size as
6352 // the source. We custom-lower any conversions that do two hops into
6353 // sequences.
6354 MVT VT = Op.getSimpleValueType();
6355 if (!VT.isVector())
6356 return Op;
6357 SDLoc DL(Op);
6358 bool IsStrict = Op->isStrictFPOpcode();
6359 SDValue Src = Op.getOperand(0 + IsStrict);
6360 MVT EltVT = VT.getVectorElementType();
6361 MVT SrcVT = Src.getSimpleValueType();
6362 MVT SrcEltVT = SrcVT.getVectorElementType();
6363 unsigned EltSize = EltVT.getSizeInBits();
6364 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6365 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6366 "Unexpected vector element types");
6367
6368 bool IsInt2FP = SrcEltVT.isInteger();
6369 // Widening conversions
6370 if (EltSize > (2 * SrcEltSize)) {
6371 if (IsInt2FP) {
6372 // Do a regular integer sign/zero extension then convert to float.
6373 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6375 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6376 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6379 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6380 if (IsStrict)
6381 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6382 Op.getOperand(0), Ext);
6383 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6384 }
6385 // FP2Int
6386 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6387 // Do one doubling fp_extend then complete the operation by converting
6388 // to int.
6389 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6390 if (IsStrict) {
6391 auto [FExt, Chain] =
6392 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6393 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6394 }
6395 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6396 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6397 }
6398
6399 // Narrowing conversions
6400 if (SrcEltSize > (2 * EltSize)) {
6401 if (IsInt2FP) {
6402 // One narrowing int_to_fp, then an fp_round.
6403 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6404 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6405 if (IsStrict) {
6406 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6407 DAG.getVTList(InterimFVT, MVT::Other),
6408 Op.getOperand(0), Src);
6409 SDValue Chain = Int2FP.getValue(1);
6410 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6411 }
6412 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6413 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6414 }
6415 // FP2Int
6416 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6417 // representable by the integer, the result is poison.
6418 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6420 if (IsStrict) {
6421 SDValue FP2Int =
6422 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6423 Op.getOperand(0), Src);
6424 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6425 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6426 }
6427 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6428 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6429 }
6430
6431 // Scalable vectors can exit here. Patterns will handle equally-sized
6432 // conversions halving/doubling ones.
6433 if (!VT.isFixedLengthVector())
6434 return Op;
6435
6436 // For fixed-length vectors we lower to a custom "VL" node.
6437 unsigned RVVOpc = 0;
6438 switch (Op.getOpcode()) {
6439 default:
6440 llvm_unreachable("Impossible opcode");
6441 case ISD::FP_TO_SINT:
6443 break;
6444 case ISD::FP_TO_UINT:
6446 break;
6447 case ISD::SINT_TO_FP:
6448 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6449 break;
6450 case ISD::UINT_TO_FP:
6451 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6452 break;
6455 break;
6458 break;
6461 break;
6464 break;
6465 }
6466
6467 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6468 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6469 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6470 "Expected same element count");
6471
6472 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6473
6474 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6475 if (IsStrict) {
6476 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6477 Op.getOperand(0), Src, Mask, VL);
6478 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6479 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6480 }
6481 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6482 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6483 }
6486 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6487 case ISD::FP_TO_BF16: {
6488 // Custom lower to ensure the libcall return is passed in an FPR on hard
6489 // float ABIs.
6490 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6491 SDLoc DL(Op);
6492 MakeLibCallOptions CallOptions;
6493 RTLIB::Libcall LC =
6494 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6495 SDValue Res =
6496 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6497 if (Subtarget.is64Bit() && !RV64LegalI32)
6498 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6499 return DAG.getBitcast(MVT::i32, Res);
6500 }
6501 case ISD::BF16_TO_FP: {
6502 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6503 MVT VT = Op.getSimpleValueType();
6504 SDLoc DL(Op);
6505 Op = DAG.getNode(
6506 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6507 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6508 SDValue Res = Subtarget.is64Bit()
6509 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6510 : DAG.getBitcast(MVT::f32, Op);
6511 // fp_extend if the target VT is bigger than f32.
6512 if (VT != MVT::f32)
6513 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6514 return Res;
6515 }
6516 case ISD::FP_TO_FP16: {
6517 // Custom lower to ensure the libcall return is passed in an FPR on hard
6518 // float ABIs.
6519 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6520 SDLoc DL(Op);
6521 MakeLibCallOptions CallOptions;
6522 RTLIB::Libcall LC =
6523 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6524 SDValue Res =
6525 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6526 if (Subtarget.is64Bit() && !RV64LegalI32)
6527 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6528 return DAG.getBitcast(MVT::i32, Res);
6529 }
6530 case ISD::FP16_TO_FP: {
6531 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6532 // float ABIs.
6533 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6534 SDLoc DL(Op);
6535 MakeLibCallOptions CallOptions;
6536 SDValue Arg = Subtarget.is64Bit()
6537 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6538 Op.getOperand(0))
6539 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6540 SDValue Res =
6541 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6542 .first;
6543 return Res;
6544 }
6545 case ISD::FTRUNC:
6546 case ISD::FCEIL:
6547 case ISD::FFLOOR:
6548 case ISD::FNEARBYINT:
6549 case ISD::FRINT:
6550 case ISD::FROUND:
6551 case ISD::FROUNDEVEN:
6552 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6553 case ISD::LRINT:
6554 case ISD::LLRINT:
6555 return lowerVectorXRINT(Op, DAG, Subtarget);
6556 case ISD::VECREDUCE_ADD:
6561 return lowerVECREDUCE(Op, DAG);
6562 case ISD::VECREDUCE_AND:
6563 case ISD::VECREDUCE_OR:
6564 case ISD::VECREDUCE_XOR:
6565 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6566 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6567 return lowerVECREDUCE(Op, DAG);
6574 return lowerFPVECREDUCE(Op, DAG);
6575 case ISD::VP_REDUCE_ADD:
6576 case ISD::VP_REDUCE_UMAX:
6577 case ISD::VP_REDUCE_SMAX:
6578 case ISD::VP_REDUCE_UMIN:
6579 case ISD::VP_REDUCE_SMIN:
6580 case ISD::VP_REDUCE_FADD:
6581 case ISD::VP_REDUCE_SEQ_FADD:
6582 case ISD::VP_REDUCE_FMIN:
6583 case ISD::VP_REDUCE_FMAX:
6584 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6585 (Subtarget.hasVInstructionsF16Minimal() &&
6586 !Subtarget.hasVInstructionsF16()))
6587 return SplitVectorReductionOp(Op, DAG);
6588 return lowerVPREDUCE(Op, DAG);
6589 case ISD::VP_REDUCE_AND:
6590 case ISD::VP_REDUCE_OR:
6591 case ISD::VP_REDUCE_XOR:
6592 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6593 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6594 return lowerVPREDUCE(Op, DAG);
6595 case ISD::UNDEF: {
6596 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6597 return convertFromScalableVector(Op.getSimpleValueType(),
6598 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6599 }
6601 return lowerINSERT_SUBVECTOR(Op, DAG);
6603 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6605 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6607 return lowerVECTOR_INTERLEAVE(Op, DAG);
6608 case ISD::STEP_VECTOR:
6609 return lowerSTEP_VECTOR(Op, DAG);
6611 return lowerVECTOR_REVERSE(Op, DAG);
6612 case ISD::VECTOR_SPLICE:
6613 return lowerVECTOR_SPLICE(Op, DAG);
6614 case ISD::BUILD_VECTOR:
6615 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6616 case ISD::SPLAT_VECTOR:
6617 if (Op.getValueType().getScalarType() == MVT::f16 &&
6618 (Subtarget.hasVInstructionsF16Minimal() &&
6619 !Subtarget.hasVInstructionsF16())) {
6620 if (Op.getValueType() == MVT::nxv32f16)
6621 return SplitVectorOp(Op, DAG);
6622 SDLoc DL(Op);
6623 SDValue NewScalar =
6624 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6625 SDValue NewSplat = DAG.getNode(
6627 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6628 NewScalar);
6629 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6630 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6631 }
6632 if (Op.getValueType().getVectorElementType() == MVT::i1)
6633 return lowerVectorMaskSplat(Op, DAG);
6634 return SDValue();
6636 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6637 case ISD::CONCAT_VECTORS: {
6638 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6639 // better than going through the stack, as the default expansion does.
6640 SDLoc DL(Op);
6641 MVT VT = Op.getSimpleValueType();
6642 MVT ContainerVT = VT;
6643 if (VT.isFixedLengthVector())
6644 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6645
6646 // Recursively split concat_vectors with more than 2 operands:
6647 //
6648 // concat_vector op1, op2, op3, op4
6649 // ->
6650 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6651 //
6652 // This reduces the length of the chain of vslideups and allows us to
6653 // perform the vslideups at a smaller LMUL, limited to MF2.
6654 if (Op.getNumOperands() > 2 &&
6655 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6656 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6658 size_t HalfNumOps = Op.getNumOperands() / 2;
6659 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6660 Op->ops().take_front(HalfNumOps));
6661 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6662 Op->ops().drop_front(HalfNumOps));
6663 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6664 }
6665
6666 unsigned NumOpElts =
6667 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6668 SDValue Vec = DAG.getUNDEF(VT);
6669 for (const auto &OpIdx : enumerate(Op->ops())) {
6670 SDValue SubVec = OpIdx.value();
6671 // Don't insert undef subvectors.
6672 if (SubVec.isUndef())
6673 continue;
6674 Vec =
6675 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6676 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6677 }
6678 return Vec;
6679 }
6680 case ISD::LOAD:
6681 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6682 return V;
6683 if (Op.getValueType().isFixedLengthVector())
6684 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6685 return Op;
6686 case ISD::STORE:
6687 if (auto V = expandUnalignedRVVStore(Op, DAG))
6688 return V;
6689 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6690 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6691 return Op;
6692 case ISD::MLOAD:
6693 case ISD::VP_LOAD:
6694 return lowerMaskedLoad(Op, DAG);
6695 case ISD::MSTORE:
6696 case ISD::VP_STORE:
6697 return lowerMaskedStore(Op, DAG);
6698 case ISD::SELECT_CC: {
6699 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6700 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6701 // into separate SETCC+SELECT just like LegalizeDAG.
6702 SDValue Tmp1 = Op.getOperand(0);
6703 SDValue Tmp2 = Op.getOperand(1);
6704 SDValue True = Op.getOperand(2);
6705 SDValue False = Op.getOperand(3);
6706 EVT VT = Op.getValueType();
6707 SDValue CC = Op.getOperand(4);
6708 EVT CmpVT = Tmp1.getValueType();
6709 EVT CCVT =
6710 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6711 SDLoc DL(Op);
6712 SDValue Cond =
6713 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6714 return DAG.getSelect(DL, VT, Cond, True, False);
6715 }
6716 case ISD::SETCC: {
6717 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6718 if (OpVT.isScalarInteger()) {
6719 MVT VT = Op.getSimpleValueType();
6720 SDValue LHS = Op.getOperand(0);
6721 SDValue RHS = Op.getOperand(1);
6722 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6723 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6724 "Unexpected CondCode");
6725
6726 SDLoc DL(Op);
6727
6728 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6729 // convert this to the equivalent of (set(u)ge X, C+1) by using
6730 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6731 // in a register.
6732 if (isa<ConstantSDNode>(RHS)) {
6733 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6734 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6735 // If this is an unsigned compare and the constant is -1, incrementing
6736 // the constant would change behavior. The result should be false.
6737 if (CCVal == ISD::SETUGT && Imm == -1)
6738 return DAG.getConstant(0, DL, VT);
6739 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6740 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6741 SDValue SetCC = DAG.getSetCC(
6742 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6743 return DAG.getLogicalNOT(DL, SetCC, VT);
6744 }
6745 }
6746
6747 // Not a constant we could handle, swap the operands and condition code to
6748 // SETLT/SETULT.
6749 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6750 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6751 }
6752
6753 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6754 (Subtarget.hasVInstructionsF16Minimal() &&
6755 !Subtarget.hasVInstructionsF16()))
6756 return SplitVectorOp(Op, DAG);
6757
6758 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6759 }
6760 case ISD::ADD:
6761 case ISD::SUB:
6762 case ISD::MUL:
6763 case ISD::MULHS:
6764 case ISD::MULHU:
6765 case ISD::AND:
6766 case ISD::OR:
6767 case ISD::XOR:
6768 case ISD::SDIV:
6769 case ISD::SREM:
6770 case ISD::UDIV:
6771 case ISD::UREM:
6772 case ISD::BSWAP:
6773 case ISD::CTPOP:
6774 return lowerToScalableOp(Op, DAG);
6775 case ISD::SHL:
6776 case ISD::SRA:
6777 case ISD::SRL:
6778 if (Op.getSimpleValueType().isFixedLengthVector())
6779 return lowerToScalableOp(Op, DAG);
6780 // This can be called for an i32 shift amount that needs to be promoted.
6781 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6782 "Unexpected custom legalisation");
6783 return SDValue();
6784 case ISD::FADD:
6785 case ISD::FSUB:
6786 case ISD::FMUL:
6787 case ISD::FDIV:
6788 case ISD::FNEG:
6789 case ISD::FABS:
6790 case ISD::FSQRT:
6791 case ISD::FMA:
6792 case ISD::FMINNUM:
6793 case ISD::FMAXNUM:
6794 if (Op.getValueType() == MVT::nxv32f16 &&
6795 (Subtarget.hasVInstructionsF16Minimal() &&
6796 !Subtarget.hasVInstructionsF16()))
6797 return SplitVectorOp(Op, DAG);
6798 [[fallthrough]];
6799 case ISD::AVGFLOORU:
6800 case ISD::AVGCEILU:
6801 case ISD::SMIN:
6802 case ISD::SMAX:
6803 case ISD::UMIN:
6804 case ISD::UMAX:
6805 return lowerToScalableOp(Op, DAG);
6806 case ISD::UADDSAT:
6807 case ISD::USUBSAT:
6808 if (!Op.getValueType().isVector())
6809 return lowerUADDSAT_USUBSAT(Op, DAG);
6810 return lowerToScalableOp(Op, DAG);
6811 case ISD::SADDSAT:
6812 case ISD::SSUBSAT:
6813 if (!Op.getValueType().isVector())
6814 return lowerSADDSAT_SSUBSAT(Op, DAG);
6815 return lowerToScalableOp(Op, DAG);
6816 case ISD::ABDS:
6817 case ISD::ABDU: {
6818 SDLoc dl(Op);
6819 EVT VT = Op->getValueType(0);
6820 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6821 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6822 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6823
6824 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6825 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6826 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6827 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6828 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6829 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6830 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6831 }
6832 case ISD::ABS:
6833 case ISD::VP_ABS:
6834 return lowerABS(Op, DAG);
6835 case ISD::CTLZ:
6837 case ISD::CTTZ:
6839 if (Subtarget.hasStdExtZvbb())
6840 return lowerToScalableOp(Op, DAG);
6841 assert(Op.getOpcode() != ISD::CTTZ);
6842 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6843 case ISD::VSELECT:
6844 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6845 case ISD::FCOPYSIGN:
6846 if (Op.getValueType() == MVT::nxv32f16 &&
6847 (Subtarget.hasVInstructionsF16Minimal() &&
6848 !Subtarget.hasVInstructionsF16()))
6849 return SplitVectorOp(Op, DAG);
6850 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6851 case ISD::STRICT_FADD:
6852 case ISD::STRICT_FSUB:
6853 case ISD::STRICT_FMUL:
6854 case ISD::STRICT_FDIV:
6855 case ISD::STRICT_FSQRT:
6856 case ISD::STRICT_FMA:
6857 if (Op.getValueType() == MVT::nxv32f16 &&
6858 (Subtarget.hasVInstructionsF16Minimal() &&
6859 !Subtarget.hasVInstructionsF16()))
6860 return SplitStrictFPVectorOp(Op, DAG);
6861 return lowerToScalableOp(Op, DAG);
6862 case ISD::STRICT_FSETCC:
6864 return lowerVectorStrictFSetcc(Op, DAG);
6865 case ISD::STRICT_FCEIL:
6866 case ISD::STRICT_FRINT:
6867 case ISD::STRICT_FFLOOR:
6868 case ISD::STRICT_FTRUNC:
6870 case ISD::STRICT_FROUND:
6872 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6873 case ISD::MGATHER:
6874 case ISD::VP_GATHER:
6875 return lowerMaskedGather(Op, DAG);
6876 case ISD::MSCATTER:
6877 case ISD::VP_SCATTER:
6878 return lowerMaskedScatter(Op, DAG);
6879 case ISD::GET_ROUNDING:
6880 return lowerGET_ROUNDING(Op, DAG);
6881 case ISD::SET_ROUNDING:
6882 return lowerSET_ROUNDING(Op, DAG);
6883 case ISD::EH_DWARF_CFA:
6884 return lowerEH_DWARF_CFA(Op, DAG);
6885 case ISD::VP_SELECT:
6886 case ISD::VP_MERGE:
6887 case ISD::VP_ADD:
6888 case ISD::VP_SUB:
6889 case ISD::VP_MUL:
6890 case ISD::VP_SDIV:
6891 case ISD::VP_UDIV:
6892 case ISD::VP_SREM:
6893 case ISD::VP_UREM:
6894 case ISD::VP_UADDSAT:
6895 case ISD::VP_USUBSAT:
6896 case ISD::VP_SADDSAT:
6897 case ISD::VP_SSUBSAT:
6898 case ISD::VP_LRINT:
6899 case ISD::VP_LLRINT:
6900 return lowerVPOp(Op, DAG);
6901 case ISD::VP_AND:
6902 case ISD::VP_OR:
6903 case ISD::VP_XOR:
6904 return lowerLogicVPOp(Op, DAG);
6905 case ISD::VP_FADD:
6906 case ISD::VP_FSUB:
6907 case ISD::VP_FMUL:
6908 case ISD::VP_FDIV:
6909 case ISD::VP_FNEG:
6910 case ISD::VP_FABS:
6911 case ISD::VP_SQRT:
6912 case ISD::VP_FMA:
6913 case ISD::VP_FMINNUM:
6914 case ISD::VP_FMAXNUM:
6915 case ISD::VP_FCOPYSIGN:
6916 if (Op.getValueType() == MVT::nxv32f16 &&
6917 (Subtarget.hasVInstructionsF16Minimal() &&
6918 !Subtarget.hasVInstructionsF16()))
6919 return SplitVPOp(Op, DAG);
6920 [[fallthrough]];
6921 case ISD::VP_ASHR:
6922 case ISD::VP_LSHR:
6923 case ISD::VP_SHL:
6924 return lowerVPOp(Op, DAG);
6925 case ISD::VP_IS_FPCLASS:
6926 return LowerIS_FPCLASS(Op, DAG);
6927 case ISD::VP_SIGN_EXTEND:
6928 case ISD::VP_ZERO_EXTEND:
6929 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6930 return lowerVPExtMaskOp(Op, DAG);
6931 return lowerVPOp(Op, DAG);
6932 case ISD::VP_TRUNCATE:
6933 return lowerVectorTruncLike(Op, DAG);
6934 case ISD::VP_FP_EXTEND:
6935 case ISD::VP_FP_ROUND:
6936 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6937 case ISD::VP_SINT_TO_FP:
6938 case ISD::VP_UINT_TO_FP:
6939 if (Op.getValueType().isVector() &&
6940 Op.getValueType().getScalarType() == MVT::f16 &&
6941 (Subtarget.hasVInstructionsF16Minimal() &&
6942 !Subtarget.hasVInstructionsF16())) {
6943 if (Op.getValueType() == MVT::nxv32f16)
6944 return SplitVPOp(Op, DAG);
6945 // int -> f32
6946 SDLoc DL(Op);
6947 MVT NVT =
6948 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6949 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6950 // f32 -> f16
6951 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6952 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6953 }
6954 [[fallthrough]];
6955 case ISD::VP_FP_TO_SINT:
6956 case ISD::VP_FP_TO_UINT:
6957 if (SDValue Op1 = Op.getOperand(0);
6958 Op1.getValueType().isVector() &&
6959 Op1.getValueType().getScalarType() == MVT::f16 &&
6960 (Subtarget.hasVInstructionsF16Minimal() &&
6961 !Subtarget.hasVInstructionsF16())) {
6962 if (Op1.getValueType() == MVT::nxv32f16)
6963 return SplitVPOp(Op, DAG);
6964 // f16 -> f32
6965 SDLoc DL(Op);
6966 MVT NVT = MVT::getVectorVT(MVT::f32,
6967 Op1.getValueType().getVectorElementCount());
6968 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6969 // f32 -> int
6970 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6971 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
6972 }
6973 return lowerVPFPIntConvOp(Op, DAG);
6974 case ISD::VP_SETCC:
6975 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6976 (Subtarget.hasVInstructionsF16Minimal() &&
6977 !Subtarget.hasVInstructionsF16()))
6978 return SplitVPOp(Op, DAG);
6979 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6980 return lowerVPSetCCMaskOp(Op, DAG);
6981 [[fallthrough]];
6982 case ISD::VP_SMIN:
6983 case ISD::VP_SMAX:
6984 case ISD::VP_UMIN:
6985 case ISD::VP_UMAX:
6986 case ISD::VP_BITREVERSE:
6987 case ISD::VP_BSWAP:
6988 return lowerVPOp(Op, DAG);
6989 case ISD::VP_CTLZ:
6990 case ISD::VP_CTLZ_ZERO_UNDEF:
6991 if (Subtarget.hasStdExtZvbb())
6992 return lowerVPOp(Op, DAG);
6993 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6994 case ISD::VP_CTTZ:
6995 case ISD::VP_CTTZ_ZERO_UNDEF:
6996 if (Subtarget.hasStdExtZvbb())
6997 return lowerVPOp(Op, DAG);
6998 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6999 case ISD::VP_CTPOP:
7000 return lowerVPOp(Op, DAG);
7001 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7002 return lowerVPStridedLoad(Op, DAG);
7003 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7004 return lowerVPStridedStore(Op, DAG);
7005 case ISD::VP_FCEIL:
7006 case ISD::VP_FFLOOR:
7007 case ISD::VP_FRINT:
7008 case ISD::VP_FNEARBYINT:
7009 case ISD::VP_FROUND:
7010 case ISD::VP_FROUNDEVEN:
7011 case ISD::VP_FROUNDTOZERO:
7012 if (Op.getValueType() == MVT::nxv32f16 &&
7013 (Subtarget.hasVInstructionsF16Minimal() &&
7014 !Subtarget.hasVInstructionsF16()))
7015 return SplitVPOp(Op, DAG);
7016 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7017 case ISD::VP_FMAXIMUM:
7018 case ISD::VP_FMINIMUM:
7019 if (Op.getValueType() == MVT::nxv32f16 &&
7020 (Subtarget.hasVInstructionsF16Minimal() &&
7021 !Subtarget.hasVInstructionsF16()))
7022 return SplitVPOp(Op, DAG);
7023 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7024 case ISD::EXPERIMENTAL_VP_SPLICE:
7025 return lowerVPSpliceExperimental(Op, DAG);
7026 case ISD::EXPERIMENTAL_VP_REVERSE:
7027 return lowerVPReverseExperimental(Op, DAG);
7028 }
7029}
7030
7032 SelectionDAG &DAG, unsigned Flags) {
7033 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7034}
7035
7037 SelectionDAG &DAG, unsigned Flags) {
7038 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7039 Flags);
7040}
7041
7043 SelectionDAG &DAG, unsigned Flags) {
7044 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7045 N->getOffset(), Flags);
7046}
7047
7049 SelectionDAG &DAG, unsigned Flags) {
7050 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7051}
7052
7053template <class NodeTy>
7054SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7055 bool IsLocal, bool IsExternWeak) const {
7056 SDLoc DL(N);
7057 EVT Ty = getPointerTy(DAG.getDataLayout());
7058
7059 // When HWASAN is used and tagging of global variables is enabled
7060 // they should be accessed via the GOT, since the tagged address of a global
7061 // is incompatible with existing code models. This also applies to non-pic
7062 // mode.
7063 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7064 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7065 if (IsLocal && !Subtarget.allowTaggedGlobals())
7066 // Use PC-relative addressing to access the symbol. This generates the
7067 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7068 // %pcrel_lo(auipc)).
7069 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7070
7071 // Use PC-relative addressing to access the GOT for this symbol, then load
7072 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7073 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7074 SDValue Load =
7075 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7081 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7082 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7083 return Load;
7084 }
7085
7086 switch (getTargetMachine().getCodeModel()) {
7087 default:
7088 report_fatal_error("Unsupported code model for lowering");
7089 case CodeModel::Small: {
7090 // Generate a sequence for accessing addresses within the first 2 GiB of
7091 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7092 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7093 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7094 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7095 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7096 }
7097 case CodeModel::Medium: {
7098 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7099 if (IsExternWeak) {
7100 // An extern weak symbol may be undefined, i.e. have value 0, which may
7101 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7102 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7103 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7104 SDValue Load =
7105 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7111 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7112 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7113 return Load;
7114 }
7115
7116 // Generate a sequence for accessing addresses within any 2GiB range within
7117 // the address space. This generates the pattern (PseudoLLA sym), which
7118 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7119 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7120 }
7121 }
7122}
7123
7124SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7125 SelectionDAG &DAG) const {
7126 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7127 assert(N->getOffset() == 0 && "unexpected offset in global node");
7128 const GlobalValue *GV = N->getGlobal();
7129 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7130}
7131
7132SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7133 SelectionDAG &DAG) const {
7134 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7135
7136 return getAddr(N, DAG);
7137}
7138
7139SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7140 SelectionDAG &DAG) const {
7141 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7142
7143 return getAddr(N, DAG);
7144}
7145
7146SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7147 SelectionDAG &DAG) const {
7148 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7149
7150 return getAddr(N, DAG);
7151}
7152
7153SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7154 SelectionDAG &DAG,
7155 bool UseGOT) const {
7156 SDLoc DL(N);
7157 EVT Ty = getPointerTy(DAG.getDataLayout());
7158 const GlobalValue *GV = N->getGlobal();
7159 MVT XLenVT = Subtarget.getXLenVT();
7160
7161 if (UseGOT) {
7162 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7163 // load the address from the GOT and add the thread pointer. This generates
7164 // the pattern (PseudoLA_TLS_IE sym), which expands to
7165 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7166 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7167 SDValue Load =
7168 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7174 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7175 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7176
7177 // Add the thread pointer.
7178 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7179 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7180 }
7181
7182 // Generate a sequence for accessing the address relative to the thread
7183 // pointer, with the appropriate adjustment for the thread pointer offset.
7184 // This generates the pattern
7185 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7186 SDValue AddrHi =
7188 SDValue AddrAdd =
7190 SDValue AddrLo =
7192
7193 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7194 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7195 SDValue MNAdd =
7196 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7197 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7198}
7199
7200SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7201 SelectionDAG &DAG) const {
7202 SDLoc DL(N);
7203 EVT Ty = getPointerTy(DAG.getDataLayout());
7204 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7205 const GlobalValue *GV = N->getGlobal();
7206
7207 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7208 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7209 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7210 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7211 SDValue Load =
7212 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7213
7214 // Prepare argument list to generate call.
7216 ArgListEntry Entry;
7217 Entry.Node = Load;
7218 Entry.Ty = CallTy;
7219 Args.push_back(Entry);
7220
7221 // Setup call to __tls_get_addr.
7223 CLI.setDebugLoc(DL)
7224 .setChain(DAG.getEntryNode())
7225 .setLibCallee(CallingConv::C, CallTy,
7226 DAG.getExternalSymbol("__tls_get_addr", Ty),
7227 std::move(Args));
7228
7229 return LowerCallTo(CLI).first;
7230}
7231
7232SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7233 SelectionDAG &DAG) const {
7234 SDLoc DL(N);
7235 EVT Ty = getPointerTy(DAG.getDataLayout());
7236 const GlobalValue *GV = N->getGlobal();
7237
7238 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7239 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7240 //
7241 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7242 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7243 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7244 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7245 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7246 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7247}
7248
7249SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7250 SelectionDAG &DAG) const {
7251 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7252 assert(N->getOffset() == 0 && "unexpected offset in global node");
7253
7254 if (DAG.getTarget().useEmulatedTLS())
7255 return LowerToTLSEmulatedModel(N, DAG);
7256
7258
7261 report_fatal_error("In GHC calling convention TLS is not supported");
7262
7263 SDValue Addr;
7264 switch (Model) {
7266 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7267 break;
7269 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7270 break;
7273 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7274 : getDynamicTLSAddr(N, DAG);
7275 break;
7276 }
7277
7278 return Addr;
7279}
7280
7281// Return true if Val is equal to (setcc LHS, RHS, CC).
7282// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7283// Otherwise, return std::nullopt.
7284static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7285 ISD::CondCode CC, SDValue Val) {
7286 assert(Val->getOpcode() == ISD::SETCC);
7287 SDValue LHS2 = Val.getOperand(0);
7288 SDValue RHS2 = Val.getOperand(1);
7289 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7290
7291 if (LHS == LHS2 && RHS == RHS2) {
7292 if (CC == CC2)
7293 return true;
7294 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7295 return false;
7296 } else if (LHS == RHS2 && RHS == LHS2) {
7298 if (CC == CC2)
7299 return true;
7300 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7301 return false;
7302 }
7303
7304 return std::nullopt;
7305}
7306
7308 const RISCVSubtarget &Subtarget) {
7309 SDValue CondV = N->getOperand(0);
7310 SDValue TrueV = N->getOperand(1);
7311 SDValue FalseV = N->getOperand(2);
7312 MVT VT = N->getSimpleValueType(0);
7313 SDLoc DL(N);
7314
7315 if (!Subtarget.hasConditionalMoveFusion()) {
7316 // (select c, -1, y) -> -c | y
7317 if (isAllOnesConstant(TrueV)) {
7318 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7319 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7320 }
7321 // (select c, y, -1) -> (c-1) | y
7322 if (isAllOnesConstant(FalseV)) {
7323 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7324 DAG.getAllOnesConstant(DL, VT));
7325 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7326 }
7327
7328 // (select c, 0, y) -> (c-1) & y
7329 if (isNullConstant(TrueV)) {
7330 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7331 DAG.getAllOnesConstant(DL, VT));
7332 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7333 }
7334 // (select c, y, 0) -> -c & y
7335 if (isNullConstant(FalseV)) {
7336 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7337 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7338 }
7339 }
7340
7341 // select c, ~x, x --> xor -c, x
7342 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7343 const APInt &TrueVal = TrueV->getAsAPIntVal();
7344 const APInt &FalseVal = FalseV->getAsAPIntVal();
7345 if (~TrueVal == FalseVal) {
7346 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7347 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7348 }
7349 }
7350
7351 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7352 // when both truev and falsev are also setcc.
7353 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7354 FalseV.getOpcode() == ISD::SETCC) {
7355 SDValue LHS = CondV.getOperand(0);
7356 SDValue RHS = CondV.getOperand(1);
7357 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7358
7359 // (select x, x, y) -> x | y
7360 // (select !x, x, y) -> x & y
7361 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7362 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7363 DAG.getFreeze(FalseV));
7364 }
7365 // (select x, y, x) -> x & y
7366 // (select !x, y, x) -> x | y
7367 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7368 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7369 DAG.getFreeze(TrueV), FalseV);
7370 }
7371 }
7372
7373 return SDValue();
7374}
7375
7376// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7377// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7378// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7379// being `0` or `-1`. In such cases we can replace `select` with `and`.
7380// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7381// than `c0`?
7382static SDValue
7384 const RISCVSubtarget &Subtarget) {
7385 if (Subtarget.hasShortForwardBranchOpt())
7386 return SDValue();
7387
7388 unsigned SelOpNo = 0;
7389 SDValue Sel = BO->getOperand(0);
7390 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7391 SelOpNo = 1;
7392 Sel = BO->getOperand(1);
7393 }
7394
7395 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7396 return SDValue();
7397
7398 unsigned ConstSelOpNo = 1;
7399 unsigned OtherSelOpNo = 2;
7400 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7401 ConstSelOpNo = 2;
7402 OtherSelOpNo = 1;
7403 }
7404 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7405 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7406 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7407 return SDValue();
7408
7409 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7410 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7411 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7412 return SDValue();
7413
7414 SDLoc DL(Sel);
7415 EVT VT = BO->getValueType(0);
7416
7417 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7418 if (SelOpNo == 1)
7419 std::swap(NewConstOps[0], NewConstOps[1]);
7420
7421 SDValue NewConstOp =
7422 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7423 if (!NewConstOp)
7424 return SDValue();
7425
7426 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7427 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7428 return SDValue();
7429
7430 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7431 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7432 if (SelOpNo == 1)
7433 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7434 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7435
7436 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7437 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7438 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7439}
7440
7441SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7442 SDValue CondV = Op.getOperand(0);
7443 SDValue TrueV = Op.getOperand(1);
7444 SDValue FalseV = Op.getOperand(2);
7445 SDLoc DL(Op);
7446 MVT VT = Op.getSimpleValueType();
7447 MVT XLenVT = Subtarget.getXLenVT();
7448
7449 // Lower vector SELECTs to VSELECTs by splatting the condition.
7450 if (VT.isVector()) {
7451 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7452 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7453 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7454 }
7455
7456 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7457 // nodes to implement the SELECT. Performing the lowering here allows for
7458 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7459 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7460 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7461 VT.isScalarInteger()) {
7462 // (select c, t, 0) -> (czero_eqz t, c)
7463 if (isNullConstant(FalseV))
7464 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7465 // (select c, 0, f) -> (czero_nez f, c)
7466 if (isNullConstant(TrueV))
7467 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7468
7469 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7470 if (TrueV.getOpcode() == ISD::AND &&
7471 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7472 return DAG.getNode(
7473 ISD::OR, DL, VT, TrueV,
7474 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7475 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7476 if (FalseV.getOpcode() == ISD::AND &&
7477 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7478 return DAG.getNode(
7479 ISD::OR, DL, VT, FalseV,
7480 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7481
7482 // Try some other optimizations before falling back to generic lowering.
7483 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7484 return V;
7485
7486 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7487 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7488 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7489 const APInt &TrueVal = TrueV->getAsAPIntVal();
7490 const APInt &FalseVal = FalseV->getAsAPIntVal();
7491 const int TrueValCost = RISCVMatInt::getIntMatCost(
7492 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7493 const int FalseValCost = RISCVMatInt::getIntMatCost(
7494 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7495 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7496 SDValue LHSVal = DAG.getConstant(
7497 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7498 SDValue RHSVal =
7499 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7500 SDValue CMOV =
7502 DL, VT, LHSVal, CondV);
7503 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7504 }
7505
7506 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7507 // Unless we have the short forward branch optimization.
7508 if (!Subtarget.hasConditionalMoveFusion())
7509 return DAG.getNode(
7510 ISD::OR, DL, VT,
7511 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7512 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7513 }
7514
7515 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7516 return V;
7517
7518 if (Op.hasOneUse()) {
7519 unsigned UseOpc = Op->use_begin()->getOpcode();
7520 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7521 SDNode *BinOp = *Op->use_begin();
7522 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7523 DAG, Subtarget)) {
7524 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7525 return lowerSELECT(NewSel, DAG);
7526 }
7527 }
7528 }
7529
7530 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7531 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7532 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7533 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7534 if (FPTV && FPFV) {
7535 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7536 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7537 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7538 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7539 DAG.getConstant(1, DL, XLenVT));
7540 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7541 }
7542 }
7543
7544 // If the condition is not an integer SETCC which operates on XLenVT, we need
7545 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7546 // (select condv, truev, falsev)
7547 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7548 if (CondV.getOpcode() != ISD::SETCC ||
7549 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7550 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7551 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7552
7553 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7554
7555 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7556 }
7557
7558 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7559 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7560 // advantage of the integer compare+branch instructions. i.e.:
7561 // (select (setcc lhs, rhs, cc), truev, falsev)
7562 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7563 SDValue LHS = CondV.getOperand(0);
7564 SDValue RHS = CondV.getOperand(1);
7565 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7566
7567 // Special case for a select of 2 constants that have a diffence of 1.
7568 // Normally this is done by DAGCombine, but if the select is introduced by
7569 // type legalization or op legalization, we miss it. Restricting to SETLT
7570 // case for now because that is what signed saturating add/sub need.
7571 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7572 // but we would probably want to swap the true/false values if the condition
7573 // is SETGE/SETLE to avoid an XORI.
7574 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7575 CCVal == ISD::SETLT) {
7576 const APInt &TrueVal = TrueV->getAsAPIntVal();
7577 const APInt &FalseVal = FalseV->getAsAPIntVal();
7578 if (TrueVal - 1 == FalseVal)
7579 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7580 if (TrueVal + 1 == FalseVal)
7581 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7582 }
7583
7584 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7585 // 1 < x ? x : 1 -> 0 < x ? x : 1
7586 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7587 RHS == TrueV && LHS == FalseV) {
7588 LHS = DAG.getConstant(0, DL, VT);
7589 // 0 <u x is the same as x != 0.
7590 if (CCVal == ISD::SETULT) {
7591 std::swap(LHS, RHS);
7592 CCVal = ISD::SETNE;
7593 }
7594 }
7595
7596 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7597 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7598 RHS == FalseV) {
7599 RHS = DAG.getConstant(0, DL, VT);
7600 }
7601
7602 SDValue TargetCC = DAG.getCondCode(CCVal);
7603
7604 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7605 // (select (setcc lhs, rhs, CC), constant, falsev)
7606 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7607 std::swap(TrueV, FalseV);
7608 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7609 }
7610
7611 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7612 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7613}
7614
7615SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7616 SDValue CondV = Op.getOperand(1);
7617 SDLoc DL(Op);
7618 MVT XLenVT = Subtarget.getXLenVT();
7619
7620 if (CondV.getOpcode() == ISD::SETCC &&
7621 CondV.getOperand(0).getValueType() == XLenVT) {
7622 SDValue LHS = CondV.getOperand(0);
7623 SDValue RHS = CondV.getOperand(1);
7624 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7625
7626 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7627
7628 SDValue TargetCC = DAG.getCondCode(CCVal);
7629 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7630 LHS, RHS, TargetCC, Op.getOperand(2));
7631 }
7632
7633 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7634 CondV, DAG.getConstant(0, DL, XLenVT),
7635 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7636}
7637
7638SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7641
7642 SDLoc DL(Op);
7643 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7645
7646 // vastart just stores the address of the VarArgsFrameIndex slot into the
7647 // memory location argument.
7648 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7649 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7650 MachinePointerInfo(SV));
7651}
7652
7653SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7654 SelectionDAG &DAG) const {
7655 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7657 MachineFrameInfo &MFI = MF.getFrameInfo();
7658 MFI.setFrameAddressIsTaken(true);
7659 Register FrameReg = RI.getFrameRegister(MF);
7660 int XLenInBytes = Subtarget.getXLen() / 8;
7661
7662 EVT VT = Op.getValueType();
7663 SDLoc DL(Op);
7664 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7665 unsigned Depth = Op.getConstantOperandVal(0);
7666 while (Depth--) {
7667 int Offset = -(XLenInBytes * 2);
7668 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7670 FrameAddr =
7671 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7672 }
7673 return FrameAddr;
7674}
7675
7676SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7677 SelectionDAG &DAG) const {
7678 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7680 MachineFrameInfo &MFI = MF.getFrameInfo();
7681 MFI.setReturnAddressIsTaken(true);
7682 MVT XLenVT = Subtarget.getXLenVT();
7683 int XLenInBytes = Subtarget.getXLen() / 8;
7684
7686 return SDValue();
7687
7688 EVT VT = Op.getValueType();
7689 SDLoc DL(Op);
7690 unsigned Depth = Op.getConstantOperandVal(0);
7691 if (Depth) {
7692 int Off = -XLenInBytes;
7693 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7694 SDValue Offset = DAG.getConstant(Off, DL, VT);
7695 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7696 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7698 }
7699
7700 // Return the value of the return address register, marking it an implicit
7701 // live-in.
7702 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7703 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7704}
7705
7706SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7707 SelectionDAG &DAG) const {
7708 SDLoc DL(Op);
7709 SDValue Lo = Op.getOperand(0);
7710 SDValue Hi = Op.getOperand(1);
7711 SDValue Shamt = Op.getOperand(2);
7712 EVT VT = Lo.getValueType();
7713
7714 // if Shamt-XLEN < 0: // Shamt < XLEN
7715 // Lo = Lo << Shamt
7716 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7717 // else:
7718 // Lo = 0
7719 // Hi = Lo << (Shamt-XLEN)
7720
7721 SDValue Zero = DAG.getConstant(0, DL, VT);
7722 SDValue One = DAG.getConstant(1, DL, VT);
7723 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7724 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7725 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7726 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7727
7728 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7729 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7730 SDValue ShiftRightLo =
7731 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7732 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7733 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7734 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7735
7736 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7737
7738 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7739 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7740
7741 SDValue Parts[2] = {Lo, Hi};
7742 return DAG.getMergeValues(Parts, DL);
7743}
7744
7745SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7746 bool IsSRA) const {
7747 SDLoc DL(Op);
7748 SDValue Lo = Op.getOperand(0);
7749 SDValue Hi = Op.getOperand(1);
7750 SDValue Shamt = Op.getOperand(2);
7751 EVT VT = Lo.getValueType();
7752
7753 // SRA expansion:
7754 // if Shamt-XLEN < 0: // Shamt < XLEN
7755 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7756 // Hi = Hi >>s Shamt
7757 // else:
7758 // Lo = Hi >>s (Shamt-XLEN);
7759 // Hi = Hi >>s (XLEN-1)
7760 //
7761 // SRL expansion:
7762 // if Shamt-XLEN < 0: // Shamt < XLEN
7763 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7764 // Hi = Hi >>u Shamt
7765 // else:
7766 // Lo = Hi >>u (Shamt-XLEN);
7767 // Hi = 0;
7768
7769 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7770
7771 SDValue Zero = DAG.getConstant(0, DL, VT);
7772 SDValue One = DAG.getConstant(1, DL, VT);
7773 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7774 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7775 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7776 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7777
7778 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7779 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7780 SDValue ShiftLeftHi =
7781 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7782 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7783 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7784 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7785 SDValue HiFalse =
7786 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7787
7788 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7789
7790 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7791 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7792
7793 SDValue Parts[2] = {Lo, Hi};
7794 return DAG.getMergeValues(Parts, DL);
7795}
7796
7797// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7798// legal equivalently-sized i8 type, so we can use that as a go-between.
7799SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7800 SelectionDAG &DAG) const {
7801 SDLoc DL(Op);
7802 MVT VT = Op.getSimpleValueType();
7803 SDValue SplatVal = Op.getOperand(0);
7804 // All-zeros or all-ones splats are handled specially.
7805 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7806 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7807 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7808 }
7809 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7810 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7811 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7812 }
7813 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7814 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7815 DAG.getConstant(1, DL, SplatVal.getValueType()));
7816 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7817 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7818 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7819}
7820
7821// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7822// illegal (currently only vXi64 RV32).
7823// FIXME: We could also catch non-constant sign-extended i32 values and lower
7824// them to VMV_V_X_VL.
7825SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7826 SelectionDAG &DAG) const {
7827 SDLoc DL(Op);
7828 MVT VecVT = Op.getSimpleValueType();
7829 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7830 "Unexpected SPLAT_VECTOR_PARTS lowering");
7831
7832 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7833 SDValue Lo = Op.getOperand(0);
7834 SDValue Hi = Op.getOperand(1);
7835
7836 MVT ContainerVT = VecVT;
7837 if (VecVT.isFixedLengthVector())
7838 ContainerVT = getContainerForFixedLengthVector(VecVT);
7839
7840 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7841
7842 SDValue Res =
7843 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7844
7845 if (VecVT.isFixedLengthVector())
7846 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7847
7848 return Res;
7849}
7850
7851// Custom-lower extensions from mask vectors by using a vselect either with 1
7852// for zero/any-extension or -1 for sign-extension:
7853// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7854// Note that any-extension is lowered identically to zero-extension.
7855SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7856 int64_t ExtTrueVal) const {
7857 SDLoc DL(Op);
7858 MVT VecVT = Op.getSimpleValueType();
7859 SDValue Src = Op.getOperand(0);
7860 // Only custom-lower extensions from mask types
7861 assert(Src.getValueType().isVector() &&
7862 Src.getValueType().getVectorElementType() == MVT::i1);
7863
7864 if (VecVT.isScalableVector()) {
7865 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7866 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7867 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7868 }
7869
7870 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7871 MVT I1ContainerVT =
7872 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7873
7874 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7875
7876 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7877
7878 MVT XLenVT = Subtarget.getXLenVT();
7879 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7880 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7881
7882 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7883 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7884 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7885 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7886 SDValue Select =
7887 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7888 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7889
7890 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7891}
7892
7893SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7894 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7895 MVT ExtVT = Op.getSimpleValueType();
7896 // Only custom-lower extensions from fixed-length vector types.
7897 if (!ExtVT.isFixedLengthVector())
7898 return Op;
7899 MVT VT = Op.getOperand(0).getSimpleValueType();
7900 // Grab the canonical container type for the extended type. Infer the smaller
7901 // type from that to ensure the same number of vector elements, as we know
7902 // the LMUL will be sufficient to hold the smaller type.
7903 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7904 // Get the extended container type manually to ensure the same number of
7905 // vector elements between source and dest.
7906 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7907 ContainerExtVT.getVectorElementCount());
7908
7909 SDValue Op1 =
7910 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7911
7912 SDLoc DL(Op);
7913 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7914
7915 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
7916
7917 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
7918}
7919
7920// Custom-lower truncations from vectors to mask vectors by using a mask and a
7921// setcc operation:
7922// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7923SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7924 SelectionDAG &DAG) const {
7925 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7926 SDLoc DL(Op);
7927 EVT MaskVT = Op.getValueType();
7928 // Only expect to custom-lower truncations to mask types
7929 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7930 "Unexpected type for vector mask lowering");
7931 SDValue Src = Op.getOperand(0);
7932 MVT VecVT = Src.getSimpleValueType();
7933 SDValue Mask, VL;
7934 if (IsVPTrunc) {
7935 Mask = Op.getOperand(1);
7936 VL = Op.getOperand(2);
7937 }
7938 // If this is a fixed vector, we need to convert it to a scalable vector.
7939 MVT ContainerVT = VecVT;
7940
7941 if (VecVT.isFixedLengthVector()) {
7942 ContainerVT = getContainerForFixedLengthVector(VecVT);
7943 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7944 if (IsVPTrunc) {
7945 MVT MaskContainerVT =
7946 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7947 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7948 }
7949 }
7950
7951 if (!IsVPTrunc) {
7952 std::tie(Mask, VL) =
7953 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7954 }
7955
7956 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
7957 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7958
7959 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7960 DAG.getUNDEF(ContainerVT), SplatOne, VL);
7961 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7962 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7963
7964 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7965 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
7966 DAG.getUNDEF(ContainerVT), Mask, VL);
7967 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
7968 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
7969 DAG.getUNDEF(MaskContainerVT), Mask, VL});
7970 if (MaskVT.isFixedLengthVector())
7971 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
7972 return Trunc;
7973}
7974
7975SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7976 SelectionDAG &DAG) const {
7977 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7978 SDLoc DL(Op);
7979
7980 MVT VT = Op.getSimpleValueType();
7981 // Only custom-lower vector truncates
7982 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7983
7984 // Truncates to mask types are handled differently
7985 if (VT.getVectorElementType() == MVT::i1)
7986 return lowerVectorMaskTruncLike(Op, DAG);
7987
7988 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7989 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7990 // truncate by one power of two at a time.
7991 MVT DstEltVT = VT.getVectorElementType();
7992
7993 SDValue Src = Op.getOperand(0);
7994 MVT SrcVT = Src.getSimpleValueType();
7995 MVT SrcEltVT = SrcVT.getVectorElementType();
7996
7997 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
7998 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
7999 "Unexpected vector truncate lowering");
8000
8001 MVT ContainerVT = SrcVT;
8002 SDValue Mask, VL;
8003 if (IsVPTrunc) {
8004 Mask = Op.getOperand(1);
8005 VL = Op.getOperand(2);
8006 }
8007 if (SrcVT.isFixedLengthVector()) {
8008 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8009 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8010 if (IsVPTrunc) {
8011 MVT MaskVT = getMaskTypeFor(ContainerVT);
8012 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8013 }
8014 }
8015
8016 SDValue Result = Src;
8017 if (!IsVPTrunc) {
8018 std::tie(Mask, VL) =
8019 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8020 }
8021
8022 LLVMContext &Context = *DAG.getContext();
8023 const ElementCount Count = ContainerVT.getVectorElementCount();
8024 do {
8025 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8026 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
8027 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8028 Mask, VL);
8029 } while (SrcEltVT != DstEltVT);
8030
8031 if (SrcVT.isFixedLengthVector())
8032 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8033
8034 return Result;
8035}
8036
8037SDValue
8038RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8039 SelectionDAG &DAG) const {
8040 SDLoc DL(Op);
8041 SDValue Chain = Op.getOperand(0);
8042 SDValue Src = Op.getOperand(1);
8043 MVT VT = Op.getSimpleValueType();
8044 MVT SrcVT = Src.getSimpleValueType();
8045 MVT ContainerVT = VT;
8046 if (VT.isFixedLengthVector()) {
8047 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8048 ContainerVT =
8049 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8050 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8051 }
8052
8053 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8054
8055 // RVV can only widen/truncate fp to types double/half the size as the source.
8056 if ((VT.getVectorElementType() == MVT::f64 &&
8057 SrcVT.getVectorElementType() == MVT::f16) ||
8058 (VT.getVectorElementType() == MVT::f16 &&
8059 SrcVT.getVectorElementType() == MVT::f64)) {
8060 // For double rounding, the intermediate rounding should be round-to-odd.
8061 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8064 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8065 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8066 Chain, Src, Mask, VL);
8067 Chain = Src.getValue(1);
8068 }
8069
8070 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8073 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8074 Chain, Src, Mask, VL);
8075 if (VT.isFixedLengthVector()) {
8076 // StrictFP operations have two result values. Their lowered result should
8077 // have same result count.
8078 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8079 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8080 }
8081 return Res;
8082}
8083
8084SDValue
8085RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8086 SelectionDAG &DAG) const {
8087 bool IsVP =
8088 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8089 bool IsExtend =
8090 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8091 // RVV can only do truncate fp to types half the size as the source. We
8092 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8093 // conversion instruction.
8094 SDLoc DL(Op);
8095 MVT VT = Op.getSimpleValueType();
8096
8097 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8098
8099 SDValue Src = Op.getOperand(0);
8100 MVT SrcVT = Src.getSimpleValueType();
8101
8102 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8103 SrcVT.getVectorElementType() != MVT::f16);
8104 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
8105 SrcVT.getVectorElementType() != MVT::f64);
8106
8107 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8108
8109 // Prepare any fixed-length vector operands.
8110 MVT ContainerVT = VT;
8111 SDValue Mask, VL;
8112 if (IsVP) {
8113 Mask = Op.getOperand(1);
8114 VL = Op.getOperand(2);
8115 }
8116 if (VT.isFixedLengthVector()) {
8117 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8118 ContainerVT =
8119 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8120 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8121 if (IsVP) {
8122 MVT MaskVT = getMaskTypeFor(ContainerVT);
8123 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8124 }
8125 }
8126
8127 if (!IsVP)
8128 std::tie(Mask, VL) =
8129 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8130
8131 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8132
8133 if (IsDirectConv) {
8134 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8135 if (VT.isFixedLengthVector())
8136 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8137 return Src;
8138 }
8139
8140 unsigned InterConvOpc =
8142
8143 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8144 SDValue IntermediateConv =
8145 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8146 SDValue Result =
8147 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8148 if (VT.isFixedLengthVector())
8149 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8150 return Result;
8151}
8152
8153// Given a scalable vector type and an index into it, returns the type for the
8154// smallest subvector that the index fits in. This can be used to reduce LMUL
8155// for operations like vslidedown.
8156//
8157// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8158static std::optional<MVT>
8159getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8160 const RISCVSubtarget &Subtarget) {
8161 assert(VecVT.isScalableVector());
8162 const unsigned EltSize = VecVT.getScalarSizeInBits();
8163 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8164 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8165 MVT SmallerVT;
8166 if (MaxIdx < MinVLMAX)
8167 SmallerVT = getLMUL1VT(VecVT);
8168 else if (MaxIdx < MinVLMAX * 2)
8169 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8170 else if (MaxIdx < MinVLMAX * 4)
8171 SmallerVT = getLMUL1VT(VecVT)
8174 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8175 return std::nullopt;
8176 return SmallerVT;
8177}
8178
8179// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8180// first position of a vector, and that vector is slid up to the insert index.
8181// By limiting the active vector length to index+1 and merging with the
8182// original vector (with an undisturbed tail policy for elements >= VL), we
8183// achieve the desired result of leaving all elements untouched except the one
8184// at VL-1, which is replaced with the desired value.
8185SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8186 SelectionDAG &DAG) const {
8187 SDLoc DL(Op);
8188 MVT VecVT = Op.getSimpleValueType();
8189 SDValue Vec = Op.getOperand(0);
8190 SDValue Val = Op.getOperand(1);
8191 SDValue Idx = Op.getOperand(2);
8192
8193 if (VecVT.getVectorElementType() == MVT::i1) {
8194 // FIXME: For now we just promote to an i8 vector and insert into that,
8195 // but this is probably not optimal.
8196 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8197 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8198 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8199 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8200 }
8201
8202 MVT ContainerVT = VecVT;
8203 // If the operand is a fixed-length vector, convert to a scalable one.
8204 if (VecVT.isFixedLengthVector()) {
8205 ContainerVT = getContainerForFixedLengthVector(VecVT);
8206 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8207 }
8208
8209 // If we know the index we're going to insert at, we can shrink Vec so that
8210 // we're performing the scalar inserts and slideup on a smaller LMUL.
8211 MVT OrigContainerVT = ContainerVT;
8212 SDValue OrigVec = Vec;
8213 SDValue AlignedIdx;
8214 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8215 const unsigned OrigIdx = IdxC->getZExtValue();
8216 // Do we know an upper bound on LMUL?
8217 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8218 DL, DAG, Subtarget)) {
8219 ContainerVT = *ShrunkVT;
8220 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8221 }
8222
8223 // If we're compiling for an exact VLEN value, we can always perform
8224 // the insert in m1 as we can determine the register corresponding to
8225 // the index in the register group.
8226 const MVT M1VT = getLMUL1VT(ContainerVT);
8227 if (auto VLEN = Subtarget.getRealVLen();
8228 VLEN && ContainerVT.bitsGT(M1VT)) {
8229 EVT ElemVT = VecVT.getVectorElementType();
8230 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8231 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8232 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8233 unsigned ExtractIdx =
8234 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8235 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8236 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8237 ContainerVT = M1VT;
8238 }
8239
8240 if (AlignedIdx)
8241 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8242 AlignedIdx);
8243 }
8244
8245 MVT XLenVT = Subtarget.getXLenVT();
8246
8247 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8248 // Even i64-element vectors on RV32 can be lowered without scalar
8249 // legalization if the most-significant 32 bits of the value are not affected
8250 // by the sign-extension of the lower 32 bits.
8251 // TODO: We could also catch sign extensions of a 32-bit value.
8252 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8253 const auto *CVal = cast<ConstantSDNode>(Val);
8254 if (isInt<32>(CVal->getSExtValue())) {
8255 IsLegalInsert = true;
8256 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8257 }
8258 }
8259
8260 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8261
8262 SDValue ValInVec;
8263
8264 if (IsLegalInsert) {
8265 unsigned Opc =
8267 if (isNullConstant(Idx)) {
8268 if (!VecVT.isFloatingPoint())
8269 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8270 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8271
8272 if (AlignedIdx)
8273 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8274 Vec, AlignedIdx);
8275 if (!VecVT.isFixedLengthVector())
8276 return Vec;
8277 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8278 }
8279 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8280 } else {
8281 // On RV32, i64-element vectors must be specially handled to place the
8282 // value at element 0, by using two vslide1down instructions in sequence on
8283 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8284 // this.
8285 SDValue ValLo, ValHi;
8286 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8287 MVT I32ContainerVT =
8288 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8289 SDValue I32Mask =
8290 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8291 // Limit the active VL to two.
8292 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8293 // If the Idx is 0 we can insert directly into the vector.
8294 if (isNullConstant(Idx)) {
8295 // First slide in the lo value, then the hi in above it. We use slide1down
8296 // to avoid the register group overlap constraint of vslide1up.
8297 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8298 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8299 // If the source vector is undef don't pass along the tail elements from
8300 // the previous slide1down.
8301 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8302 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8303 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8304 // Bitcast back to the right container type.
8305 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8306
8307 if (AlignedIdx)
8308 ValInVec =
8309 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8310 ValInVec, AlignedIdx);
8311 if (!VecVT.isFixedLengthVector())
8312 return ValInVec;
8313 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8314 }
8315
8316 // First slide in the lo value, then the hi in above it. We use slide1down
8317 // to avoid the register group overlap constraint of vslide1up.
8318 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8319 DAG.getUNDEF(I32ContainerVT),
8320 DAG.getUNDEF(I32ContainerVT), ValLo,
8321 I32Mask, InsertI64VL);
8322 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8323 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8324 I32Mask, InsertI64VL);
8325 // Bitcast back to the right container type.
8326 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8327 }
8328
8329 // Now that the value is in a vector, slide it into position.
8330 SDValue InsertVL =
8331 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8332
8333 // Use tail agnostic policy if Idx is the last index of Vec.
8335 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8336 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8337 Policy = RISCVII::TAIL_AGNOSTIC;
8338 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8339 Idx, Mask, InsertVL, Policy);
8340
8341 if (AlignedIdx)
8342 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8343 Slideup, AlignedIdx);
8344 if (!VecVT.isFixedLengthVector())
8345 return Slideup;
8346 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8347}
8348
8349// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8350// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8351// types this is done using VMV_X_S to allow us to glean information about the
8352// sign bits of the result.
8353SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8354 SelectionDAG &DAG) const {
8355 SDLoc DL(Op);
8356 SDValue Idx = Op.getOperand(1);
8357 SDValue Vec = Op.getOperand(0);
8358 EVT EltVT = Op.getValueType();
8359 MVT VecVT = Vec.getSimpleValueType();
8360 MVT XLenVT = Subtarget.getXLenVT();
8361
8362 if (VecVT.getVectorElementType() == MVT::i1) {
8363 // Use vfirst.m to extract the first bit.
8364 if (isNullConstant(Idx)) {
8365 MVT ContainerVT = VecVT;
8366 if (VecVT.isFixedLengthVector()) {
8367 ContainerVT = getContainerForFixedLengthVector(VecVT);
8368 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8369 }
8370 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8371 SDValue Vfirst =
8372 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8373 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8374 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8375 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8376 }
8377 if (VecVT.isFixedLengthVector()) {
8378 unsigned NumElts = VecVT.getVectorNumElements();
8379 if (NumElts >= 8) {
8380 MVT WideEltVT;
8381 unsigned WidenVecLen;
8382 SDValue ExtractElementIdx;
8383 SDValue ExtractBitIdx;
8384 unsigned MaxEEW = Subtarget.getELen();
8385 MVT LargestEltVT = MVT::getIntegerVT(
8386 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8387 if (NumElts <= LargestEltVT.getSizeInBits()) {
8388 assert(isPowerOf2_32(NumElts) &&
8389 "the number of elements should be power of 2");
8390 WideEltVT = MVT::getIntegerVT(NumElts);
8391 WidenVecLen = 1;
8392 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8393 ExtractBitIdx = Idx;
8394 } else {
8395 WideEltVT = LargestEltVT;
8396 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8397 // extract element index = index / element width
8398 ExtractElementIdx = DAG.getNode(
8399 ISD::SRL, DL, XLenVT, Idx,
8400 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8401 // mask bit index = index % element width
8402 ExtractBitIdx = DAG.getNode(
8403 ISD::AND, DL, XLenVT, Idx,
8404 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8405 }
8406 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8407 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8408 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8409 Vec, ExtractElementIdx);
8410 // Extract the bit from GPR.
8411 SDValue ShiftRight =
8412 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8413 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8414 DAG.getConstant(1, DL, XLenVT));
8415 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8416 }
8417 }
8418 // Otherwise, promote to an i8 vector and extract from that.
8419 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8420 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8421 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8422 }
8423
8424 // If this is a fixed vector, we need to convert it to a scalable vector.
8425 MVT ContainerVT = VecVT;
8426 if (VecVT.isFixedLengthVector()) {
8427 ContainerVT = getContainerForFixedLengthVector(VecVT);
8428 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8429 }
8430
8431 // If we're compiling for an exact VLEN value and we have a known
8432 // constant index, we can always perform the extract in m1 (or
8433 // smaller) as we can determine the register corresponding to
8434 // the index in the register group.
8435 const auto VLen = Subtarget.getRealVLen();
8436 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8437 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8438 MVT M1VT = getLMUL1VT(ContainerVT);
8439 unsigned OrigIdx = IdxC->getZExtValue();
8440 EVT ElemVT = VecVT.getVectorElementType();
8441 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8442 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8443 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8444 unsigned ExtractIdx =
8445 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8446 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8447 DAG.getVectorIdxConstant(ExtractIdx, DL));
8448 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8449 ContainerVT = M1VT;
8450 }
8451
8452 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8453 // contains our index.
8454 std::optional<uint64_t> MaxIdx;
8455 if (VecVT.isFixedLengthVector())
8456 MaxIdx = VecVT.getVectorNumElements() - 1;
8457 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8458 MaxIdx = IdxC->getZExtValue();
8459 if (MaxIdx) {
8460 if (auto SmallerVT =
8461 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8462 ContainerVT = *SmallerVT;
8463 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8464 DAG.getConstant(0, DL, XLenVT));
8465 }
8466 }
8467
8468 // If after narrowing, the required slide is still greater than LMUL2,
8469 // fallback to generic expansion and go through the stack. This is done
8470 // for a subtle reason: extracting *all* elements out of a vector is
8471 // widely expected to be linear in vector size, but because vslidedown
8472 // is linear in LMUL, performing N extracts using vslidedown becomes
8473 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8474 // seems to have the same problem (the store is linear in LMUL), but the
8475 // generic expansion *memoizes* the store, and thus for many extracts of
8476 // the same vector we end up with one store and a bunch of loads.
8477 // TODO: We don't have the same code for insert_vector_elt because we
8478 // have BUILD_VECTOR and handle the degenerate case there. Should we
8479 // consider adding an inverse BUILD_VECTOR node?
8480 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8481 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8482 return SDValue();
8483
8484 // If the index is 0, the vector is already in the right position.
8485 if (!isNullConstant(Idx)) {
8486 // Use a VL of 1 to avoid processing more elements than we need.
8487 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8488 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8489 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8490 }
8491
8492 if (!EltVT.isInteger()) {
8493 // Floating-point extracts are handled in TableGen.
8494 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8495 DAG.getVectorIdxConstant(0, DL));
8496 }
8497
8498 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8499 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8500}
8501
8502// Some RVV intrinsics may claim that they want an integer operand to be
8503// promoted or expanded.
8505 const RISCVSubtarget &Subtarget) {
8506 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8507 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8508 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8509 "Unexpected opcode");
8510
8511 if (!Subtarget.hasVInstructions())
8512 return SDValue();
8513
8514 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8515 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8516 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8517
8518 SDLoc DL(Op);
8519
8521 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8522 if (!II || !II->hasScalarOperand())
8523 return SDValue();
8524
8525 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8526 assert(SplatOp < Op.getNumOperands());
8527
8528 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8529 SDValue &ScalarOp = Operands[SplatOp];
8530 MVT OpVT = ScalarOp.getSimpleValueType();
8531 MVT XLenVT = Subtarget.getXLenVT();
8532
8533 // If this isn't a scalar, or its type is XLenVT we're done.
8534 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8535 return SDValue();
8536
8537 // Simplest case is that the operand needs to be promoted to XLenVT.
8538 if (OpVT.bitsLT(XLenVT)) {
8539 // If the operand is a constant, sign extend to increase our chances
8540 // of being able to use a .vi instruction. ANY_EXTEND would become a
8541 // a zero extend and the simm5 check in isel would fail.
8542 // FIXME: Should we ignore the upper bits in isel instead?
8543 unsigned ExtOpc =
8544 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8545 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8546 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8547 }
8548
8549 // Use the previous operand to get the vXi64 VT. The result might be a mask
8550 // VT for compares. Using the previous operand assumes that the previous
8551 // operand will never have a smaller element size than a scalar operand and
8552 // that a widening operation never uses SEW=64.
8553 // NOTE: If this fails the below assert, we can probably just find the
8554 // element count from any operand or result and use it to construct the VT.
8555 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8556 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8557
8558 // The more complex case is when the scalar is larger than XLenVT.
8559 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8560 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8561
8562 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8563 // instruction to sign-extend since SEW>XLEN.
8564 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8565 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8566 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8567 }
8568
8569 switch (IntNo) {
8570 case Intrinsic::riscv_vslide1up:
8571 case Intrinsic::riscv_vslide1down:
8572 case Intrinsic::riscv_vslide1up_mask:
8573 case Intrinsic::riscv_vslide1down_mask: {
8574 // We need to special case these when the scalar is larger than XLen.
8575 unsigned NumOps = Op.getNumOperands();
8576 bool IsMasked = NumOps == 7;
8577
8578 // Convert the vector source to the equivalent nxvXi32 vector.
8579 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8580 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8581 SDValue ScalarLo, ScalarHi;
8582 std::tie(ScalarLo, ScalarHi) =
8583 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8584
8585 // Double the VL since we halved SEW.
8586 SDValue AVL = getVLOperand(Op);
8587 SDValue I32VL;
8588
8589 // Optimize for constant AVL
8590 if (isa<ConstantSDNode>(AVL)) {
8591 const auto [MinVLMAX, MaxVLMAX] =
8593
8594 uint64_t AVLInt = AVL->getAsZExtVal();
8595 if (AVLInt <= MinVLMAX) {
8596 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8597 } else if (AVLInt >= 2 * MaxVLMAX) {
8598 // Just set vl to VLMAX in this situation
8600 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8601 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8602 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8603 SDValue SETVLMAX = DAG.getTargetConstant(
8604 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8605 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8606 LMUL);
8607 } else {
8608 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8609 // is related to the hardware implementation.
8610 // So let the following code handle
8611 }
8612 }
8613 if (!I32VL) {
8615 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8616 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8617 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8618 SDValue SETVL =
8619 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8620 // Using vsetvli instruction to get actually used length which related to
8621 // the hardware implementation
8622 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8623 SEW, LMUL);
8624 I32VL =
8625 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8626 }
8627
8628 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8629
8630 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8631 // instructions.
8632 SDValue Passthru;
8633 if (IsMasked)
8634 Passthru = DAG.getUNDEF(I32VT);
8635 else
8636 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8637
8638 if (IntNo == Intrinsic::riscv_vslide1up ||
8639 IntNo == Intrinsic::riscv_vslide1up_mask) {
8640 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8641 ScalarHi, I32Mask, I32VL);
8642 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8643 ScalarLo, I32Mask, I32VL);
8644 } else {
8645 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8646 ScalarLo, I32Mask, I32VL);
8647 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8648 ScalarHi, I32Mask, I32VL);
8649 }
8650
8651 // Convert back to nxvXi64.
8652 Vec = DAG.getBitcast(VT, Vec);
8653
8654 if (!IsMasked)
8655 return Vec;
8656 // Apply mask after the operation.
8657 SDValue Mask = Operands[NumOps - 3];
8658 SDValue MaskedOff = Operands[1];
8659 // Assume Policy operand is the last operand.
8660 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8661 // We don't need to select maskedoff if it's undef.
8662 if (MaskedOff.isUndef())
8663 return Vec;
8664 // TAMU
8665 if (Policy == RISCVII::TAIL_AGNOSTIC)
8666 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8667 DAG.getUNDEF(VT), AVL);
8668 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8669 // It's fine because vmerge does not care mask policy.
8670 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8671 MaskedOff, AVL);
8672 }
8673 }
8674
8675 // We need to convert the scalar to a splat vector.
8676 SDValue VL = getVLOperand(Op);
8677 assert(VL.getValueType() == XLenVT);
8678 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8679 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8680}
8681
8682// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8683// scalable vector llvm.get.vector.length for now.
8684//
8685// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8686// (vscale * VF). The vscale and VF are independent of element width. We use
8687// SEW=8 for the vsetvli because it is the only element width that supports all
8688// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8689// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8690// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8691// SEW and LMUL are better for the surrounding vector instructions.
8693 const RISCVSubtarget &Subtarget) {
8694 MVT XLenVT = Subtarget.getXLenVT();
8695
8696 // The smallest LMUL is only valid for the smallest element width.
8697 const unsigned ElementWidth = 8;
8698
8699 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8700 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8701 // We don't support VF==1 with ELEN==32.
8702 [[maybe_unused]] unsigned MinVF =
8703 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8704
8705 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8706 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8707 "Unexpected VF");
8708
8709 bool Fractional = VF < LMul1VF;
8710 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8711 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8712 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8713
8714 SDLoc DL(N);
8715
8716 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8717 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8718
8719 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8720
8721 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8722 SDValue Res =
8723 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8724 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8725}
8726
8728 const RISCVSubtarget &Subtarget) {
8729 SDValue Op0 = N->getOperand(1);
8730 MVT OpVT = Op0.getSimpleValueType();
8731 MVT ContainerVT = OpVT;
8732 if (OpVT.isFixedLengthVector()) {
8733 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8734 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8735 }
8736 MVT XLenVT = Subtarget.getXLenVT();
8737 SDLoc DL(N);
8738 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8739 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8740 if (isOneConstant(N->getOperand(2)))
8741 return Res;
8742
8743 // Convert -1 to VL.
8744 SDValue Setcc =
8745 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8746 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8747 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8748}
8749
8750static inline void promoteVCIXScalar(const SDValue &Op,
8752 SelectionDAG &DAG) {
8753 const RISCVSubtarget &Subtarget =
8755
8756 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8757 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8758 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8759 SDLoc DL(Op);
8760
8762 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8763 if (!II || !II->hasScalarOperand())
8764 return;
8765
8766 unsigned SplatOp = II->ScalarOperand + 1;
8767 assert(SplatOp < Op.getNumOperands());
8768
8769 SDValue &ScalarOp = Operands[SplatOp];
8770 MVT OpVT = ScalarOp.getSimpleValueType();
8771 MVT XLenVT = Subtarget.getXLenVT();
8772
8773 // The code below is partially copied from lowerVectorIntrinsicScalars.
8774 // If this isn't a scalar, or its type is XLenVT we're done.
8775 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8776 return;
8777
8778 // Manually emit promote operation for scalar operation.
8779 if (OpVT.bitsLT(XLenVT)) {
8780 unsigned ExtOpc =
8781 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8782 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8783 }
8784
8785 return;
8786}
8787
8788static void processVCIXOperands(SDValue &OrigOp,
8790 SelectionDAG &DAG) {
8791 promoteVCIXScalar(OrigOp, Operands, DAG);
8792 const RISCVSubtarget &Subtarget =
8794 for (SDValue &V : Operands) {
8795 EVT ValType = V.getValueType();
8796 if (ValType.isVector() && ValType.isFloatingPoint()) {
8797 MVT InterimIVT =
8798 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8799 ValType.getVectorElementCount());
8800 V = DAG.getBitcast(InterimIVT, V);
8801 }
8802 if (ValType.isFixedLengthVector()) {
8803 MVT OpContainerVT = getContainerForFixedLengthVector(
8804 DAG, V.getSimpleValueType(), Subtarget);
8805 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8806 }
8807 }
8808}
8809
8810// LMUL * VLEN should be greater than or equal to EGS * SEW
8811static inline bool isValidEGW(int EGS, EVT VT,
8812 const RISCVSubtarget &Subtarget) {
8813 return (Subtarget.getRealMinVLen() *
8815 EGS * VT.getScalarSizeInBits();
8816}
8817
8818SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8819 SelectionDAG &DAG) const {
8820 unsigned IntNo = Op.getConstantOperandVal(0);
8821 SDLoc DL(Op);
8822 MVT XLenVT = Subtarget.getXLenVT();
8823
8824 switch (IntNo) {
8825 default:
8826 break; // Don't custom lower most intrinsics.
8827 case Intrinsic::thread_pointer: {
8828 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8829 return DAG.getRegister(RISCV::X4, PtrVT);
8830 }
8831 case Intrinsic::riscv_orc_b:
8832 case Intrinsic::riscv_brev8:
8833 case Intrinsic::riscv_sha256sig0:
8834 case Intrinsic::riscv_sha256sig1:
8835 case Intrinsic::riscv_sha256sum0:
8836 case Intrinsic::riscv_sha256sum1:
8837 case Intrinsic::riscv_sm3p0:
8838 case Intrinsic::riscv_sm3p1: {
8839 unsigned Opc;
8840 switch (IntNo) {
8841 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8842 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8843 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8844 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8845 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8846 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8847 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8848 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8849 }
8850
8851 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8852 SDValue NewOp =
8853 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8854 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8855 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8856 }
8857
8858 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8859 }
8860 case Intrinsic::riscv_sm4ks:
8861 case Intrinsic::riscv_sm4ed: {
8862 unsigned Opc =
8863 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8864
8865 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8866 SDValue NewOp0 =
8867 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8868 SDValue NewOp1 =
8869 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8870 SDValue Res =
8871 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8872 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8873 }
8874
8875 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8876 Op.getOperand(3));
8877 }
8878 case Intrinsic::riscv_zip:
8879 case Intrinsic::riscv_unzip: {
8880 unsigned Opc =
8881 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8882 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8883 }
8884 case Intrinsic::riscv_mopr: {
8885 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8886 SDValue NewOp =
8887 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8888 SDValue Res = DAG.getNode(
8889 RISCVISD::MOPR, DL, MVT::i64, NewOp,
8890 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
8891 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8892 }
8893 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8894 Op.getOperand(2));
8895 }
8896
8897 case Intrinsic::riscv_moprr: {
8898 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8899 SDValue NewOp0 =
8900 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8901 SDValue NewOp1 =
8902 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8903 SDValue Res = DAG.getNode(
8904 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
8905 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
8906 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8907 }
8908 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
8909 Op.getOperand(2), Op.getOperand(3));
8910 }
8911 case Intrinsic::riscv_clmul:
8912 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8913 SDValue NewOp0 =
8914 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8915 SDValue NewOp1 =
8916 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8917 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8918 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8919 }
8920 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8921 Op.getOperand(2));
8922 case Intrinsic::riscv_clmulh:
8923 case Intrinsic::riscv_clmulr: {
8924 unsigned Opc =
8925 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8926 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8927 SDValue NewOp0 =
8928 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8929 SDValue NewOp1 =
8930 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8931 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8932 DAG.getConstant(32, DL, MVT::i64));
8933 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8934 DAG.getConstant(32, DL, MVT::i64));
8935 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8936 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8937 DAG.getConstant(32, DL, MVT::i64));
8938 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8939 }
8940
8941 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8942 }
8943 case Intrinsic::experimental_get_vector_length:
8944 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8945 case Intrinsic::experimental_cttz_elts:
8946 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
8947 case Intrinsic::riscv_vmv_x_s: {
8948 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8949 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8950 }
8951 case Intrinsic::riscv_vfmv_f_s:
8952 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
8953 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
8954 case Intrinsic::riscv_vmv_v_x:
8955 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
8956 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
8957 Subtarget);
8958 case Intrinsic::riscv_vfmv_v_f:
8959 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
8960 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8961 case Intrinsic::riscv_vmv_s_x: {
8962 SDValue Scalar = Op.getOperand(2);
8963
8964 if (Scalar.getValueType().bitsLE(XLenVT)) {
8965 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
8966 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
8967 Op.getOperand(1), Scalar, Op.getOperand(3));
8968 }
8969
8970 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8971
8972 // This is an i64 value that lives in two scalar registers. We have to
8973 // insert this in a convoluted way. First we build vXi64 splat containing
8974 // the two values that we assemble using some bit math. Next we'll use
8975 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8976 // to merge element 0 from our splat into the source vector.
8977 // FIXME: This is probably not the best way to do this, but it is
8978 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8979 // point.
8980 // sw lo, (a0)
8981 // sw hi, 4(a0)
8982 // vlse vX, (a0)
8983 //
8984 // vid.v vVid
8985 // vmseq.vx mMask, vVid, 0
8986 // vmerge.vvm vDest, vSrc, vVal, mMask
8987 MVT VT = Op.getSimpleValueType();
8988 SDValue Vec = Op.getOperand(1);
8989 SDValue VL = getVLOperand(Op);
8990
8991 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
8992 if (Op.getOperand(1).isUndef())
8993 return SplattedVal;
8994 SDValue SplattedIdx =
8995 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8996 DAG.getConstant(0, DL, MVT::i32), VL);
8997
8998 MVT MaskVT = getMaskTypeFor(VT);
8999 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9000 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9001 SDValue SelectCond =
9002 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9003 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9004 DAG.getUNDEF(MaskVT), Mask, VL});
9005 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9006 Vec, DAG.getUNDEF(VT), VL);
9007 }
9008 case Intrinsic::riscv_vfmv_s_f:
9009 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9010 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9011 // EGS * EEW >= 128 bits
9012 case Intrinsic::riscv_vaesdf_vv:
9013 case Intrinsic::riscv_vaesdf_vs:
9014 case Intrinsic::riscv_vaesdm_vv:
9015 case Intrinsic::riscv_vaesdm_vs:
9016 case Intrinsic::riscv_vaesef_vv:
9017 case Intrinsic::riscv_vaesef_vs:
9018 case Intrinsic::riscv_vaesem_vv:
9019 case Intrinsic::riscv_vaesem_vs:
9020 case Intrinsic::riscv_vaeskf1:
9021 case Intrinsic::riscv_vaeskf2:
9022 case Intrinsic::riscv_vaesz_vs:
9023 case Intrinsic::riscv_vsm4k:
9024 case Intrinsic::riscv_vsm4r_vv:
9025 case Intrinsic::riscv_vsm4r_vs: {
9026 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9027 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9028 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9029 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9030 return Op;
9031 }
9032 // EGS * EEW >= 256 bits
9033 case Intrinsic::riscv_vsm3c:
9034 case Intrinsic::riscv_vsm3me: {
9035 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9036 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9037 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9038 return Op;
9039 }
9040 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9041 case Intrinsic::riscv_vsha2ch:
9042 case Intrinsic::riscv_vsha2cl:
9043 case Intrinsic::riscv_vsha2ms: {
9044 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9045 !Subtarget.hasStdExtZvknhb())
9046 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9047 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9048 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9049 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9050 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9051 return Op;
9052 }
9053 case Intrinsic::riscv_sf_vc_v_x:
9054 case Intrinsic::riscv_sf_vc_v_i:
9055 case Intrinsic::riscv_sf_vc_v_xv:
9056 case Intrinsic::riscv_sf_vc_v_iv:
9057 case Intrinsic::riscv_sf_vc_v_vv:
9058 case Intrinsic::riscv_sf_vc_v_fv:
9059 case Intrinsic::riscv_sf_vc_v_xvv:
9060 case Intrinsic::riscv_sf_vc_v_ivv:
9061 case Intrinsic::riscv_sf_vc_v_vvv:
9062 case Intrinsic::riscv_sf_vc_v_fvv:
9063 case Intrinsic::riscv_sf_vc_v_xvw:
9064 case Intrinsic::riscv_sf_vc_v_ivw:
9065 case Intrinsic::riscv_sf_vc_v_vvw:
9066 case Intrinsic::riscv_sf_vc_v_fvw: {
9067 MVT VT = Op.getSimpleValueType();
9068
9069 SmallVector<SDValue> Operands{Op->op_values()};
9071
9072 MVT RetVT = VT;
9073 if (VT.isFixedLengthVector())
9075 else if (VT.isFloatingPoint())
9078
9079 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9080
9081 if (VT.isFixedLengthVector())
9082 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9083 else if (VT.isFloatingPoint())
9084 NewNode = DAG.getBitcast(VT, NewNode);
9085
9086 if (Op == NewNode)
9087 break;
9088
9089 return NewNode;
9090 }
9091 }
9092
9093 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9094}
9095
9097 unsigned Type) {
9098 SDLoc DL(Op);
9099 SmallVector<SDValue> Operands{Op->op_values()};
9100 Operands.erase(Operands.begin() + 1);
9101
9102 const RISCVSubtarget &Subtarget =
9104 MVT VT = Op.getSimpleValueType();
9105 MVT RetVT = VT;
9106 MVT FloatVT = VT;
9107
9108 if (VT.isFloatingPoint()) {
9109 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9111 FloatVT = RetVT;
9112 }
9113 if (VT.isFixedLengthVector())
9115 Subtarget);
9116
9118
9119 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9120 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9121 SDValue Chain = NewNode.getValue(1);
9122
9123 if (VT.isFixedLengthVector())
9124 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9125 if (VT.isFloatingPoint())
9126 NewNode = DAG.getBitcast(VT, NewNode);
9127
9128 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9129
9130 return NewNode;
9131}
9132
9134 unsigned Type) {
9135 SmallVector<SDValue> Operands{Op->op_values()};
9136 Operands.erase(Operands.begin() + 1);
9138
9139 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9140}
9141
9142SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9143 SelectionDAG &DAG) const {
9144 unsigned IntNo = Op.getConstantOperandVal(1);
9145 switch (IntNo) {
9146 default:
9147 break;
9148 case Intrinsic::riscv_masked_strided_load: {
9149 SDLoc DL(Op);
9150 MVT XLenVT = Subtarget.getXLenVT();
9151
9152 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9153 // the selection of the masked intrinsics doesn't do this for us.
9154 SDValue Mask = Op.getOperand(5);
9155 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9156
9157 MVT VT = Op->getSimpleValueType(0);
9158 MVT ContainerVT = VT;
9159 if (VT.isFixedLengthVector())
9160 ContainerVT = getContainerForFixedLengthVector(VT);
9161
9162 SDValue PassThru = Op.getOperand(2);
9163 if (!IsUnmasked) {
9164 MVT MaskVT = getMaskTypeFor(ContainerVT);
9165 if (VT.isFixedLengthVector()) {
9166 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9167 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9168 }
9169 }
9170
9171 auto *Load = cast<MemIntrinsicSDNode>(Op);
9172 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9173 SDValue Ptr = Op.getOperand(3);
9174 SDValue Stride = Op.getOperand(4);
9175 SDValue Result, Chain;
9176
9177 // TODO: We restrict this to unmasked loads currently in consideration of
9178 // the complexity of handling all falses masks.
9179 MVT ScalarVT = ContainerVT.getVectorElementType();
9180 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9181 SDValue ScalarLoad =
9182 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9183 ScalarVT, Load->getMemOperand());
9184 Chain = ScalarLoad.getValue(1);
9185 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9186 Subtarget);
9187 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9188 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9189 Load->getMemOperand());
9190 Chain = ScalarLoad.getValue(1);
9191 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9192 } else {
9193 SDValue IntID = DAG.getTargetConstant(
9194 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9195 XLenVT);
9196
9197 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9198 if (IsUnmasked)
9199 Ops.push_back(DAG.getUNDEF(ContainerVT));
9200 else
9201 Ops.push_back(PassThru);
9202 Ops.push_back(Ptr);
9203 Ops.push_back(Stride);
9204 if (!IsUnmasked)
9205 Ops.push_back(Mask);
9206 Ops.push_back(VL);
9207 if (!IsUnmasked) {
9208 SDValue Policy =
9210 Ops.push_back(Policy);
9211 }
9212
9213 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9214 Result =
9216 Load->getMemoryVT(), Load->getMemOperand());
9217 Chain = Result.getValue(1);
9218 }
9219 if (VT.isFixedLengthVector())
9220 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9221 return DAG.getMergeValues({Result, Chain}, DL);
9222 }
9223 case Intrinsic::riscv_seg2_load:
9224 case Intrinsic::riscv_seg3_load:
9225 case Intrinsic::riscv_seg4_load:
9226 case Intrinsic::riscv_seg5_load:
9227 case Intrinsic::riscv_seg6_load:
9228 case Intrinsic::riscv_seg7_load:
9229 case Intrinsic::riscv_seg8_load: {
9230 SDLoc DL(Op);
9231 static const Intrinsic::ID VlsegInts[7] = {
9232 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9233 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9234 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9235 Intrinsic::riscv_vlseg8};
9236 unsigned NF = Op->getNumValues() - 1;
9237 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9238 MVT XLenVT = Subtarget.getXLenVT();
9239 MVT VT = Op->getSimpleValueType(0);
9240 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9241
9242 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9243 Subtarget);
9244 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9245 auto *Load = cast<MemIntrinsicSDNode>(Op);
9246 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9247 ContainerVTs.push_back(MVT::Other);
9248 SDVTList VTs = DAG.getVTList(ContainerVTs);
9249 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9250 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9251 Ops.push_back(Op.getOperand(2));
9252 Ops.push_back(VL);
9253 SDValue Result =
9255 Load->getMemoryVT(), Load->getMemOperand());
9257 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9258 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9259 DAG, Subtarget));
9260 Results.push_back(Result.getValue(NF));
9261 return DAG.getMergeValues(Results, DL);
9262 }
9263 case Intrinsic::riscv_sf_vc_v_x_se:
9265 case Intrinsic::riscv_sf_vc_v_i_se:
9267 case Intrinsic::riscv_sf_vc_v_xv_se:
9269 case Intrinsic::riscv_sf_vc_v_iv_se:
9271 case Intrinsic::riscv_sf_vc_v_vv_se:
9273 case Intrinsic::riscv_sf_vc_v_fv_se:
9275 case Intrinsic::riscv_sf_vc_v_xvv_se:
9277 case Intrinsic::riscv_sf_vc_v_ivv_se:
9279 case Intrinsic::riscv_sf_vc_v_vvv_se:
9281 case Intrinsic::riscv_sf_vc_v_fvv_se:
9283 case Intrinsic::riscv_sf_vc_v_xvw_se:
9285 case Intrinsic::riscv_sf_vc_v_ivw_se:
9287 case Intrinsic::riscv_sf_vc_v_vvw_se:
9289 case Intrinsic::riscv_sf_vc_v_fvw_se:
9291 }
9292
9293 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9294}
9295
9296SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9297 SelectionDAG &DAG) const {
9298 unsigned IntNo = Op.getConstantOperandVal(1);
9299 switch (IntNo) {
9300 default:
9301 break;
9302 case Intrinsic::riscv_masked_strided_store: {
9303 SDLoc DL(Op);
9304 MVT XLenVT = Subtarget.getXLenVT();
9305
9306 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9307 // the selection of the masked intrinsics doesn't do this for us.
9308 SDValue Mask = Op.getOperand(5);
9309 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9310
9311 SDValue Val = Op.getOperand(2);
9312 MVT VT = Val.getSimpleValueType();
9313 MVT ContainerVT = VT;
9314 if (VT.isFixedLengthVector()) {
9315 ContainerVT = getContainerForFixedLengthVector(VT);
9316 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9317 }
9318 if (!IsUnmasked) {
9319 MVT MaskVT = getMaskTypeFor(ContainerVT);
9320 if (VT.isFixedLengthVector())
9321 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9322 }
9323
9324 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9325
9326 SDValue IntID = DAG.getTargetConstant(
9327 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9328 XLenVT);
9329
9330 auto *Store = cast<MemIntrinsicSDNode>(Op);
9331 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9332 Ops.push_back(Val);
9333 Ops.push_back(Op.getOperand(3)); // Ptr
9334 Ops.push_back(Op.getOperand(4)); // Stride
9335 if (!IsUnmasked)
9336 Ops.push_back(Mask);
9337 Ops.push_back(VL);
9338
9339 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9340 Ops, Store->getMemoryVT(),
9341 Store->getMemOperand());
9342 }
9343 case Intrinsic::riscv_seg2_store:
9344 case Intrinsic::riscv_seg3_store:
9345 case Intrinsic::riscv_seg4_store:
9346 case Intrinsic::riscv_seg5_store:
9347 case Intrinsic::riscv_seg6_store:
9348 case Intrinsic::riscv_seg7_store:
9349 case Intrinsic::riscv_seg8_store: {
9350 SDLoc DL(Op);
9351 static const Intrinsic::ID VssegInts[] = {
9352 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9353 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9354 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9355 Intrinsic::riscv_vsseg8};
9356 // Operands are (chain, int_id, vec*, ptr, vl)
9357 unsigned NF = Op->getNumOperands() - 4;
9358 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9359 MVT XLenVT = Subtarget.getXLenVT();
9360 MVT VT = Op->getOperand(2).getSimpleValueType();
9361 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9362
9363 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9364 Subtarget);
9365 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9366 SDValue Ptr = Op->getOperand(NF + 2);
9367
9368 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9369 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9370 for (unsigned i = 0; i < NF; i++)
9372 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9373 Ops.append({Ptr, VL});
9374
9375 return DAG.getMemIntrinsicNode(
9376 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9377 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9378 }
9379 case Intrinsic::riscv_sf_vc_xv_se:
9381 case Intrinsic::riscv_sf_vc_iv_se:
9383 case Intrinsic::riscv_sf_vc_vv_se:
9385 case Intrinsic::riscv_sf_vc_fv_se:
9387 case Intrinsic::riscv_sf_vc_xvv_se:
9389 case Intrinsic::riscv_sf_vc_ivv_se:
9391 case Intrinsic::riscv_sf_vc_vvv_se:
9393 case Intrinsic::riscv_sf_vc_fvv_se:
9395 case Intrinsic::riscv_sf_vc_xvw_se:
9397 case Intrinsic::riscv_sf_vc_ivw_se:
9399 case Intrinsic::riscv_sf_vc_vvw_se:
9401 case Intrinsic::riscv_sf_vc_fvw_se:
9403 }
9404
9405 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9406}
9407
9408static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9409 switch (ISDOpcode) {
9410 default:
9411 llvm_unreachable("Unhandled reduction");
9412 case ISD::VP_REDUCE_ADD:
9413 case ISD::VECREDUCE_ADD:
9415 case ISD::VP_REDUCE_UMAX:
9418 case ISD::VP_REDUCE_SMAX:
9421 case ISD::VP_REDUCE_UMIN:
9424 case ISD::VP_REDUCE_SMIN:
9427 case ISD::VP_REDUCE_AND:
9428 case ISD::VECREDUCE_AND:
9430 case ISD::VP_REDUCE_OR:
9431 case ISD::VECREDUCE_OR:
9433 case ISD::VP_REDUCE_XOR:
9434 case ISD::VECREDUCE_XOR:
9436 case ISD::VP_REDUCE_FADD:
9438 case ISD::VP_REDUCE_SEQ_FADD:
9440 case ISD::VP_REDUCE_FMAX:
9442 case ISD::VP_REDUCE_FMIN:
9444 }
9445
9446}
9447
9448SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9449 SelectionDAG &DAG,
9450 bool IsVP) const {
9451 SDLoc DL(Op);
9452 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9453 MVT VecVT = Vec.getSimpleValueType();
9454 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9455 Op.getOpcode() == ISD::VECREDUCE_OR ||
9456 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9457 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9458 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9459 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9460 "Unexpected reduction lowering");
9461
9462 MVT XLenVT = Subtarget.getXLenVT();
9463
9464 MVT ContainerVT = VecVT;
9465 if (VecVT.isFixedLengthVector()) {
9466 ContainerVT = getContainerForFixedLengthVector(VecVT);
9467 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9468 }
9469
9470 SDValue Mask, VL;
9471 if (IsVP) {
9472 Mask = Op.getOperand(2);
9473 VL = Op.getOperand(3);
9474 } else {
9475 std::tie(Mask, VL) =
9476 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9477 }
9478
9479 unsigned BaseOpc;
9481 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9482
9483 switch (Op.getOpcode()) {
9484 default:
9485 llvm_unreachable("Unhandled reduction");
9486 case ISD::VECREDUCE_AND:
9487 case ISD::VP_REDUCE_AND: {
9488 // vcpop ~x == 0
9489 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9490 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9491 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9492 CC = ISD::SETEQ;
9493 BaseOpc = ISD::AND;
9494 break;
9495 }
9496 case ISD::VECREDUCE_OR:
9497 case ISD::VP_REDUCE_OR:
9498 // vcpop x != 0
9499 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9500 CC = ISD::SETNE;
9501 BaseOpc = ISD::OR;
9502 break;
9503 case ISD::VECREDUCE_XOR:
9504 case ISD::VP_REDUCE_XOR: {
9505 // ((vcpop x) & 1) != 0
9506 SDValue One = DAG.getConstant(1, DL, XLenVT);
9507 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9508 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9509 CC = ISD::SETNE;
9510 BaseOpc = ISD::XOR;
9511 break;
9512 }
9513 }
9514
9515 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9516 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9517
9518 if (!IsVP)
9519 return SetCC;
9520
9521 // Now include the start value in the operation.
9522 // Note that we must return the start value when no elements are operated
9523 // upon. The vcpop instructions we've emitted in each case above will return
9524 // 0 for an inactive vector, and so we've already received the neutral value:
9525 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9526 // can simply include the start value.
9527 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9528}
9529
9530static bool isNonZeroAVL(SDValue AVL) {
9531 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9532 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9533 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9534 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9535}
9536
9537/// Helper to lower a reduction sequence of the form:
9538/// scalar = reduce_op vec, scalar_start
9539static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9540 SDValue StartValue, SDValue Vec, SDValue Mask,
9541 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9542 const RISCVSubtarget &Subtarget) {
9543 const MVT VecVT = Vec.getSimpleValueType();
9544 const MVT M1VT = getLMUL1VT(VecVT);
9545 const MVT XLenVT = Subtarget.getXLenVT();
9546 const bool NonZeroAVL = isNonZeroAVL(VL);
9547
9548 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9549 // or the original VT if fractional.
9550 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9551 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9552 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9553 // be the result of the reduction operation.
9554 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9555 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9556 DAG, Subtarget);
9557 if (M1VT != InnerVT)
9558 InitialValue =
9559 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9560 InitialValue, DAG.getVectorIdxConstant(0, DL));
9561 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9562 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9563 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9564 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9565 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9566 DAG.getVectorIdxConstant(0, DL));
9567}
9568
9569SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9570 SelectionDAG &DAG) const {
9571 SDLoc DL(Op);
9572 SDValue Vec = Op.getOperand(0);
9573 EVT VecEVT = Vec.getValueType();
9574
9575 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9576
9577 // Due to ordering in legalize types we may have a vector type that needs to
9578 // be split. Do that manually so we can get down to a legal type.
9579 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9581 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9582 VecEVT = Lo.getValueType();
9583 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9584 }
9585
9586 // TODO: The type may need to be widened rather than split. Or widened before
9587 // it can be split.
9588 if (!isTypeLegal(VecEVT))
9589 return SDValue();
9590
9591 MVT VecVT = VecEVT.getSimpleVT();
9592 MVT VecEltVT = VecVT.getVectorElementType();
9593 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9594
9595 MVT ContainerVT = VecVT;
9596 if (VecVT.isFixedLengthVector()) {
9597 ContainerVT = getContainerForFixedLengthVector(VecVT);
9598 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9599 }
9600
9601 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9602
9603 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9604 switch (BaseOpc) {
9605 case ISD::AND:
9606 case ISD::OR:
9607 case ISD::UMAX:
9608 case ISD::UMIN:
9609 case ISD::SMAX:
9610 case ISD::SMIN:
9611 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9612 DAG.getVectorIdxConstant(0, DL));
9613 }
9614 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9615 Mask, VL, DL, DAG, Subtarget);
9616}
9617
9618// Given a reduction op, this function returns the matching reduction opcode,
9619// the vector SDValue and the scalar SDValue required to lower this to a
9620// RISCVISD node.
9621static std::tuple<unsigned, SDValue, SDValue>
9623 const RISCVSubtarget &Subtarget) {
9624 SDLoc DL(Op);
9625 auto Flags = Op->getFlags();
9626 unsigned Opcode = Op.getOpcode();
9627 switch (Opcode) {
9628 default:
9629 llvm_unreachable("Unhandled reduction");
9630 case ISD::VECREDUCE_FADD: {
9631 // Use positive zero if we can. It is cheaper to materialize.
9632 SDValue Zero =
9633 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9634 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9635 }
9637 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9638 Op.getOperand(0));
9642 case ISD::VECREDUCE_FMAX: {
9643 SDValue Front =
9644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9645 DAG.getVectorIdxConstant(0, DL));
9646 unsigned RVVOpc =
9647 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9650 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9651 }
9652 }
9653}
9654
9655SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9656 SelectionDAG &DAG) const {
9657 SDLoc DL(Op);
9658 MVT VecEltVT = Op.getSimpleValueType();
9659
9660 unsigned RVVOpcode;
9661 SDValue VectorVal, ScalarVal;
9662 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9663 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9664 MVT VecVT = VectorVal.getSimpleValueType();
9665
9666 MVT ContainerVT = VecVT;
9667 if (VecVT.isFixedLengthVector()) {
9668 ContainerVT = getContainerForFixedLengthVector(VecVT);
9669 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9670 }
9671
9672 MVT ResVT = Op.getSimpleValueType();
9673 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9674 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9675 VL, DL, DAG, Subtarget);
9676 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9677 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9678 return Res;
9679
9680 if (Op->getFlags().hasNoNaNs())
9681 return Res;
9682
9683 // Force output to NaN if any element is Nan.
9684 SDValue IsNan =
9685 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9686 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9687 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9688 MVT XLenVT = Subtarget.getXLenVT();
9689 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9690 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9691 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9692 return DAG.getSelect(
9693 DL, ResVT, NoNaNs, Res,
9695 ResVT));
9696}
9697
9698SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9699 SelectionDAG &DAG) const {
9700 SDLoc DL(Op);
9701 SDValue Vec = Op.getOperand(1);
9702 EVT VecEVT = Vec.getValueType();
9703
9704 // TODO: The type may need to be widened rather than split. Or widened before
9705 // it can be split.
9706 if (!isTypeLegal(VecEVT))
9707 return SDValue();
9708
9709 MVT VecVT = VecEVT.getSimpleVT();
9710 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9711
9712 if (VecVT.isFixedLengthVector()) {
9713 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9714 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9715 }
9716
9717 SDValue VL = Op.getOperand(3);
9718 SDValue Mask = Op.getOperand(2);
9719 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9720 Vec, Mask, VL, DL, DAG, Subtarget);
9721}
9722
9723SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9724 SelectionDAG &DAG) const {
9725 SDValue Vec = Op.getOperand(0);
9726 SDValue SubVec = Op.getOperand(1);
9727 MVT VecVT = Vec.getSimpleValueType();
9728 MVT SubVecVT = SubVec.getSimpleValueType();
9729
9730 SDLoc DL(Op);
9731 MVT XLenVT = Subtarget.getXLenVT();
9732 unsigned OrigIdx = Op.getConstantOperandVal(2);
9733 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9734
9735 // We don't have the ability to slide mask vectors up indexed by their i1
9736 // elements; the smallest we can do is i8. Often we are able to bitcast to
9737 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9738 // into a scalable one, we might not necessarily have enough scalable
9739 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9740 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9741 (OrigIdx != 0 || !Vec.isUndef())) {
9742 if (VecVT.getVectorMinNumElements() >= 8 &&
9743 SubVecVT.getVectorMinNumElements() >= 8) {
9744 assert(OrigIdx % 8 == 0 && "Invalid index");
9745 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9746 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9747 "Unexpected mask vector lowering");
9748 OrigIdx /= 8;
9749 SubVecVT =
9750 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9751 SubVecVT.isScalableVector());
9752 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9753 VecVT.isScalableVector());
9754 Vec = DAG.getBitcast(VecVT, Vec);
9755 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9756 } else {
9757 // We can't slide this mask vector up indexed by its i1 elements.
9758 // This poses a problem when we wish to insert a scalable vector which
9759 // can't be re-expressed as a larger type. Just choose the slow path and
9760 // extend to a larger type, then truncate back down.
9761 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9762 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9763 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9764 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9765 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9766 Op.getOperand(2));
9767 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9768 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9769 }
9770 }
9771
9772 // If the subvector vector is a fixed-length type, we cannot use subregister
9773 // manipulation to simplify the codegen; we don't know which register of a
9774 // LMUL group contains the specific subvector as we only know the minimum
9775 // register size. Therefore we must slide the vector group up the full
9776 // amount.
9777 if (SubVecVT.isFixedLengthVector()) {
9778 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9779 return Op;
9780 MVT ContainerVT = VecVT;
9781 if (VecVT.isFixedLengthVector()) {
9782 ContainerVT = getContainerForFixedLengthVector(VecVT);
9783 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9784 }
9785
9786 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9787 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9788 DAG.getUNDEF(ContainerVT), SubVec,
9789 DAG.getVectorIdxConstant(0, DL));
9790 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9791 return DAG.getBitcast(Op.getValueType(), SubVec);
9792 }
9793
9794 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9795 DAG.getUNDEF(ContainerVT), SubVec,
9796 DAG.getVectorIdxConstant(0, DL));
9797 SDValue Mask =
9798 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9799 // Set the vector length to only the number of elements we care about. Note
9800 // that for slideup this includes the offset.
9801 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9802 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9803
9804 // Use tail agnostic policy if we're inserting over Vec's tail.
9806 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9807 Policy = RISCVII::TAIL_AGNOSTIC;
9808
9809 // If we're inserting into the lowest elements, use a tail undisturbed
9810 // vmv.v.v.
9811 if (OrigIdx == 0) {
9812 SubVec =
9813 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9814 } else {
9815 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9816 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9817 SlideupAmt, Mask, VL, Policy);
9818 }
9819
9820 if (VecVT.isFixedLengthVector())
9821 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9822 return DAG.getBitcast(Op.getValueType(), SubVec);
9823 }
9824
9825 unsigned SubRegIdx, RemIdx;
9826 std::tie(SubRegIdx, RemIdx) =
9828 VecVT, SubVecVT, OrigIdx, TRI);
9829
9830 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9831 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9832 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9833 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9834
9835 // 1. If the Idx has been completely eliminated and this subvector's size is
9836 // a vector register or a multiple thereof, or the surrounding elements are
9837 // undef, then this is a subvector insert which naturally aligns to a vector
9838 // register. These can easily be handled using subregister manipulation.
9839 // 2. If the subvector is smaller than a vector register, then the insertion
9840 // must preserve the undisturbed elements of the register. We do this by
9841 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9842 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9843 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9844 // LMUL=1 type back into the larger vector (resolving to another subregister
9845 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9846 // to avoid allocating a large register group to hold our subvector.
9847 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9848 return Op;
9849
9850 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9851 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9852 // (in our case undisturbed). This means we can set up a subvector insertion
9853 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9854 // size of the subvector.
9855 MVT InterSubVT = VecVT;
9856 SDValue AlignedExtract = Vec;
9857 unsigned AlignedIdx = OrigIdx - RemIdx;
9858 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9859 InterSubVT = getLMUL1VT(VecVT);
9860 // Extract a subvector equal to the nearest full vector register type. This
9861 // should resolve to a EXTRACT_SUBREG instruction.
9862 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9863 DAG.getVectorIdxConstant(AlignedIdx, DL));
9864 }
9865
9866 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9867 DAG.getUNDEF(InterSubVT), SubVec,
9868 DAG.getVectorIdxConstant(0, DL));
9869
9870 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9871
9872 ElementCount EndIndex =
9874 VL = computeVLMax(SubVecVT, DL, DAG);
9875
9876 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
9878 if (EndIndex == InterSubVT.getVectorElementCount())
9879 Policy = RISCVII::TAIL_AGNOSTIC;
9880
9881 // If we're inserting into the lowest elements, use a tail undisturbed
9882 // vmv.v.v.
9883 if (RemIdx == 0) {
9884 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9885 SubVec, VL);
9886 } else {
9887 SDValue SlideupAmt =
9888 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9889
9890 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9891 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9892
9893 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9894 SlideupAmt, Mask, VL, Policy);
9895 }
9896
9897 // If required, insert this subvector back into the correct vector register.
9898 // This should resolve to an INSERT_SUBREG instruction.
9899 if (VecVT.bitsGT(InterSubVT))
9900 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9901 DAG.getVectorIdxConstant(AlignedIdx, DL));
9902
9903 // We might have bitcast from a mask type: cast back to the original type if
9904 // required.
9905 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9906}
9907
9908SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9909 SelectionDAG &DAG) const {
9910 SDValue Vec = Op.getOperand(0);
9911 MVT SubVecVT = Op.getSimpleValueType();
9912 MVT VecVT = Vec.getSimpleValueType();
9913
9914 SDLoc DL(Op);
9915 MVT XLenVT = Subtarget.getXLenVT();
9916 unsigned OrigIdx = Op.getConstantOperandVal(1);
9917 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9918
9919 // We don't have the ability to slide mask vectors down indexed by their i1
9920 // elements; the smallest we can do is i8. Often we are able to bitcast to
9921 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9922 // from a scalable one, we might not necessarily have enough scalable
9923 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9924 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9925 if (VecVT.getVectorMinNumElements() >= 8 &&
9926 SubVecVT.getVectorMinNumElements() >= 8) {
9927 assert(OrigIdx % 8 == 0 && "Invalid index");
9928 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9929 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9930 "Unexpected mask vector lowering");
9931 OrigIdx /= 8;
9932 SubVecVT =
9933 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9934 SubVecVT.isScalableVector());
9935 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9936 VecVT.isScalableVector());
9937 Vec = DAG.getBitcast(VecVT, Vec);
9938 } else {
9939 // We can't slide this mask vector down, indexed by its i1 elements.
9940 // This poses a problem when we wish to extract a scalable vector which
9941 // can't be re-expressed as a larger type. Just choose the slow path and
9942 // extend to a larger type, then truncate back down.
9943 // TODO: We could probably improve this when extracting certain fixed
9944 // from fixed, where we can extract as i8 and shift the correct element
9945 // right to reach the desired subvector?
9946 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9947 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9948 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9949 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9950 Op.getOperand(1));
9951 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9952 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9953 }
9954 }
9955
9956 // With an index of 0 this is a cast-like subvector, which can be performed
9957 // with subregister operations.
9958 if (OrigIdx == 0)
9959 return Op;
9960
9961 const auto VLen = Subtarget.getRealVLen();
9962
9963 // If the subvector vector is a fixed-length type and we don't know VLEN
9964 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9965 // don't know which register of a LMUL group contains the specific subvector
9966 // as we only know the minimum register size. Therefore we must slide the
9967 // vector group down the full amount.
9968 if (SubVecVT.isFixedLengthVector() && !VLen) {
9969 MVT ContainerVT = VecVT;
9970 if (VecVT.isFixedLengthVector()) {
9971 ContainerVT = getContainerForFixedLengthVector(VecVT);
9972 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9973 }
9974
9975 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9976 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9977 if (auto ShrunkVT =
9978 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9979 ContainerVT = *ShrunkVT;
9980 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9981 DAG.getVectorIdxConstant(0, DL));
9982 }
9983
9984 SDValue Mask =
9985 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9986 // Set the vector length to only the number of elements we care about. This
9987 // avoids sliding down elements we're going to discard straight away.
9988 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
9989 Subtarget);
9990 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9991 SDValue Slidedown =
9992 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9993 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9994 // Now we can use a cast-like subvector extract to get the result.
9995 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9996 DAG.getVectorIdxConstant(0, DL));
9997 return DAG.getBitcast(Op.getValueType(), Slidedown);
9998 }
9999
10000 if (VecVT.isFixedLengthVector()) {
10001 VecVT = getContainerForFixedLengthVector(VecVT);
10002 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10003 }
10004
10005 MVT ContainerSubVecVT = SubVecVT;
10006 if (SubVecVT.isFixedLengthVector())
10007 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10008
10009 unsigned SubRegIdx;
10010 ElementCount RemIdx;
10011 // extract_subvector scales the index by vscale if the subvector is scalable,
10012 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10013 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10014 if (SubVecVT.isFixedLengthVector()) {
10015 assert(VLen);
10016 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10017 auto Decompose =
10019 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10020 SubRegIdx = Decompose.first;
10021 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10022 (OrigIdx % Vscale));
10023 } else {
10024 auto Decompose =
10026 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10027 SubRegIdx = Decompose.first;
10028 RemIdx = ElementCount::getScalable(Decompose.second);
10029 }
10030
10031 // If the Idx has been completely eliminated then this is a subvector extract
10032 // which naturally aligns to a vector register. These can easily be handled
10033 // using subregister manipulation.
10034 if (RemIdx.isZero()) {
10035 if (SubVecVT.isFixedLengthVector()) {
10036 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10037 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10038 }
10039 return Op;
10040 }
10041
10042 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10043 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10044 // divide exactly.
10045 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10046 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10047
10048 // If the vector type is an LMUL-group type, extract a subvector equal to the
10049 // nearest full vector register type.
10050 MVT InterSubVT = VecVT;
10051 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10052 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10053 // we should have successfully decomposed the extract into a subregister.
10054 assert(SubRegIdx != RISCV::NoSubRegister);
10055 InterSubVT = getLMUL1VT(VecVT);
10056 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10057 }
10058
10059 // Slide this vector register down by the desired number of elements in order
10060 // to place the desired subvector starting at element 0.
10061 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10062 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10063 if (SubVecVT.isFixedLengthVector())
10064 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10065 Subtarget);
10066 SDValue Slidedown =
10067 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10068 Vec, SlidedownAmt, Mask, VL);
10069
10070 // Now the vector is in the right position, extract our final subvector. This
10071 // should resolve to a COPY.
10072 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10073 DAG.getVectorIdxConstant(0, DL));
10074
10075 // We might have bitcast from a mask type: cast back to the original type if
10076 // required.
10077 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10078}
10079
10080// Widen a vector's operands to i8, then truncate its results back to the
10081// original type, typically i1. All operand and result types must be the same.
10083 SelectionDAG &DAG) {
10084 MVT VT = N.getSimpleValueType();
10085 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10087 for (SDValue Op : N->ops()) {
10088 assert(Op.getSimpleValueType() == VT &&
10089 "Operands and result must be same type");
10090 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10091 }
10092
10093 unsigned NumVals = N->getNumValues();
10094
10096 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10097 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10098 SmallVector<SDValue, 4> TruncVals;
10099 for (unsigned I = 0; I < NumVals; I++) {
10100 TruncVals.push_back(
10101 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10102 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10103 }
10104
10105 if (TruncVals.size() > 1)
10106 return DAG.getMergeValues(TruncVals, DL);
10107 return TruncVals.front();
10108}
10109
10110SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10111 SelectionDAG &DAG) const {
10112 SDLoc DL(Op);
10113 MVT VecVT = Op.getSimpleValueType();
10114
10115 assert(VecVT.isScalableVector() &&
10116 "vector_interleave on non-scalable vector!");
10117
10118 // 1 bit element vectors need to be widened to e8
10119 if (VecVT.getVectorElementType() == MVT::i1)
10120 return widenVectorOpsToi8(Op, DL, DAG);
10121
10122 // If the VT is LMUL=8, we need to split and reassemble.
10123 if (VecVT.getSizeInBits().getKnownMinValue() ==
10124 (8 * RISCV::RVVBitsPerBlock)) {
10125 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10126 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10127 EVT SplitVT = Op0Lo.getValueType();
10128
10130 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10132 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10133
10134 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10135 ResLo.getValue(0), ResHi.getValue(0));
10136 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10137 ResHi.getValue(1));
10138 return DAG.getMergeValues({Even, Odd}, DL);
10139 }
10140
10141 // Concatenate the two vectors as one vector to deinterleave
10142 MVT ConcatVT =
10145 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10146 Op.getOperand(0), Op.getOperand(1));
10147
10148 // We want to operate on all lanes, so get the mask and VL and mask for it
10149 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10150 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10151
10152 // We can deinterleave through vnsrl.wi if the element type is smaller than
10153 // ELEN
10154 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10155 SDValue Even =
10156 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10157 SDValue Odd =
10158 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10159 return DAG.getMergeValues({Even, Odd}, DL);
10160 }
10161
10162 // For the indices, use the same SEW to avoid an extra vsetvli
10163 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10164 // Create a vector of even indices {0, 2, 4, ...}
10165 SDValue EvenIdx =
10166 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10167 // Create a vector of odd indices {1, 3, 5, ... }
10168 SDValue OddIdx =
10169 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10170
10171 // Gather the even and odd elements into two separate vectors
10172 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10173 Concat, EvenIdx, Passthru, Mask, VL);
10174 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10175 Concat, OddIdx, Passthru, Mask, VL);
10176
10177 // Extract the result half of the gather for even and odd
10178 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10179 DAG.getVectorIdxConstant(0, DL));
10180 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10181 DAG.getVectorIdxConstant(0, DL));
10182
10183 return DAG.getMergeValues({Even, Odd}, DL);
10184}
10185
10186SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10187 SelectionDAG &DAG) const {
10188 SDLoc DL(Op);
10189 MVT VecVT = Op.getSimpleValueType();
10190
10191 assert(VecVT.isScalableVector() &&
10192 "vector_interleave on non-scalable vector!");
10193
10194 // i1 vectors need to be widened to i8
10195 if (VecVT.getVectorElementType() == MVT::i1)
10196 return widenVectorOpsToi8(Op, DL, DAG);
10197
10198 MVT XLenVT = Subtarget.getXLenVT();
10199 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10200
10201 // If the VT is LMUL=8, we need to split and reassemble.
10202 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10203 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10204 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10205 EVT SplitVT = Op0Lo.getValueType();
10206
10208 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10210 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10211
10212 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10213 ResLo.getValue(0), ResLo.getValue(1));
10214 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10215 ResHi.getValue(0), ResHi.getValue(1));
10216 return DAG.getMergeValues({Lo, Hi}, DL);
10217 }
10218
10219 SDValue Interleaved;
10220
10221 // If the element type is smaller than ELEN, then we can interleave with
10222 // vwaddu.vv and vwmaccu.vx
10223 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10224 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10225 DAG, Subtarget);
10226 } else {
10227 // Otherwise, fallback to using vrgathere16.vv
10228 MVT ConcatVT =
10231 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10232 Op.getOperand(0), Op.getOperand(1));
10233
10234 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10235
10236 // 0 1 2 3 4 5 6 7 ...
10237 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10238
10239 // 1 1 1 1 1 1 1 1 ...
10240 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10241
10242 // 1 0 1 0 1 0 1 0 ...
10243 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10244 OddMask = DAG.getSetCC(
10245 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10246 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10248
10249 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10250
10251 // Build up the index vector for interleaving the concatenated vector
10252 // 0 0 1 1 2 2 3 3 ...
10253 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10254 // 0 n 1 n+1 2 n+2 3 n+3 ...
10255 Idx =
10256 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10257
10258 // Then perform the interleave
10259 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10260 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10261 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10262 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10263 }
10264
10265 // Extract the two halves from the interleaved result
10266 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10267 DAG.getVectorIdxConstant(0, DL));
10268 SDValue Hi = DAG.getNode(
10269 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10271
10272 return DAG.getMergeValues({Lo, Hi}, DL);
10273}
10274
10275// Lower step_vector to the vid instruction. Any non-identity step value must
10276// be accounted for my manual expansion.
10277SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10278 SelectionDAG &DAG) const {
10279 SDLoc DL(Op);
10280 MVT VT = Op.getSimpleValueType();
10281 assert(VT.isScalableVector() && "Expected scalable vector");
10282 MVT XLenVT = Subtarget.getXLenVT();
10283 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10284 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10285 uint64_t StepValImm = Op.getConstantOperandVal(0);
10286 if (StepValImm != 1) {
10287 if (isPowerOf2_64(StepValImm)) {
10288 SDValue StepVal =
10289 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10290 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10291 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10292 } else {
10293 SDValue StepVal = lowerScalarSplat(
10294 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10295 VL, VT, DL, DAG, Subtarget);
10296 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10297 }
10298 }
10299 return StepVec;
10300}
10301
10302// Implement vector_reverse using vrgather.vv with indices determined by
10303// subtracting the id of each element from (VLMAX-1). This will convert
10304// the indices like so:
10305// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10306// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10307SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10308 SelectionDAG &DAG) const {
10309 SDLoc DL(Op);
10310 MVT VecVT = Op.getSimpleValueType();
10311 if (VecVT.getVectorElementType() == MVT::i1) {
10312 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10313 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10314 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10315 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10316 }
10317 unsigned EltSize = VecVT.getScalarSizeInBits();
10318 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10319 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10320 unsigned MaxVLMAX =
10321 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10322
10323 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10324 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10325
10326 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10327 // to use vrgatherei16.vv.
10328 // TODO: It's also possible to use vrgatherei16.vv for other types to
10329 // decrease register width for the index calculation.
10330 if (MaxVLMAX > 256 && EltSize == 8) {
10331 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10332 // Reverse each half, then reassemble them in reverse order.
10333 // NOTE: It's also possible that after splitting that VLMAX no longer
10334 // requires vrgatherei16.vv.
10335 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10336 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10337 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10338 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10339 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10340 // Reassemble the low and high pieces reversed.
10341 // FIXME: This is a CONCAT_VECTORS.
10342 SDValue Res =
10343 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10344 DAG.getVectorIdxConstant(0, DL));
10345 return DAG.getNode(
10346 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10347 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10348 }
10349
10350 // Just promote the int type to i16 which will double the LMUL.
10351 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10352 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10353 }
10354
10355 MVT XLenVT = Subtarget.getXLenVT();
10356 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10357
10358 // Calculate VLMAX-1 for the desired SEW.
10359 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10360 computeVLMax(VecVT, DL, DAG),
10361 DAG.getConstant(1, DL, XLenVT));
10362
10363 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10364 bool IsRV32E64 =
10365 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10366 SDValue SplatVL;
10367 if (!IsRV32E64)
10368 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10369 else
10370 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10371 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10372
10373 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10374 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10375 DAG.getUNDEF(IntVT), Mask, VL);
10376
10377 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10378 DAG.getUNDEF(VecVT), Mask, VL);
10379}
10380
10381SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10382 SelectionDAG &DAG) const {
10383 SDLoc DL(Op);
10384 SDValue V1 = Op.getOperand(0);
10385 SDValue V2 = Op.getOperand(1);
10386 MVT XLenVT = Subtarget.getXLenVT();
10387 MVT VecVT = Op.getSimpleValueType();
10388
10389 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10390
10391 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10392 SDValue DownOffset, UpOffset;
10393 if (ImmValue >= 0) {
10394 // The operand is a TargetConstant, we need to rebuild it as a regular
10395 // constant.
10396 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10397 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10398 } else {
10399 // The operand is a TargetConstant, we need to rebuild it as a regular
10400 // constant rather than negating the original operand.
10401 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10402 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10403 }
10404
10405 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10406
10407 SDValue SlideDown =
10408 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10409 DownOffset, TrueMask, UpOffset);
10410 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10411 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10413}
10414
10415SDValue
10416RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10417 SelectionDAG &DAG) const {
10418 SDLoc DL(Op);
10419 auto *Load = cast<LoadSDNode>(Op);
10420
10422 Load->getMemoryVT(),
10423 *Load->getMemOperand()) &&
10424 "Expecting a correctly-aligned load");
10425
10426 MVT VT = Op.getSimpleValueType();
10427 MVT XLenVT = Subtarget.getXLenVT();
10428 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10429
10430 // If we know the exact VLEN and our fixed length vector completely fills
10431 // the container, use a whole register load instead.
10432 const auto [MinVLMAX, MaxVLMAX] =
10433 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10434 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10435 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10436 MachineMemOperand *MMO = Load->getMemOperand();
10437 SDValue NewLoad =
10438 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10439 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10440 MMO->getAAInfo(), MMO->getRanges());
10441 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10442 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10443 }
10444
10445 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10446
10447 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10448 SDValue IntID = DAG.getTargetConstant(
10449 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10450 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10451 if (!IsMaskOp)
10452 Ops.push_back(DAG.getUNDEF(ContainerVT));
10453 Ops.push_back(Load->getBasePtr());
10454 Ops.push_back(VL);
10455 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10456 SDValue NewLoad =
10458 Load->getMemoryVT(), Load->getMemOperand());
10459
10460 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10461 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10462}
10463
10464SDValue
10465RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10466 SelectionDAG &DAG) const {
10467 SDLoc DL(Op);
10468 auto *Store = cast<StoreSDNode>(Op);
10469
10471 Store->getMemoryVT(),
10472 *Store->getMemOperand()) &&
10473 "Expecting a correctly-aligned store");
10474
10475 SDValue StoreVal = Store->getValue();
10476 MVT VT = StoreVal.getSimpleValueType();
10477 MVT XLenVT = Subtarget.getXLenVT();
10478
10479 // If the size less than a byte, we need to pad with zeros to make a byte.
10480 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10481 VT = MVT::v8i1;
10482 StoreVal =
10483 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10484 StoreVal, DAG.getVectorIdxConstant(0, DL));
10485 }
10486
10487 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10488
10489 SDValue NewValue =
10490 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10491
10492
10493 // If we know the exact VLEN and our fixed length vector completely fills
10494 // the container, use a whole register store instead.
10495 const auto [MinVLMAX, MaxVLMAX] =
10496 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10497 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10498 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10499 MachineMemOperand *MMO = Store->getMemOperand();
10500 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10501 MMO->getPointerInfo(), MMO->getBaseAlign(),
10502 MMO->getFlags(), MMO->getAAInfo());
10503 }
10504
10505 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10506 Subtarget);
10507
10508 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10509 SDValue IntID = DAG.getTargetConstant(
10510 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10511 return DAG.getMemIntrinsicNode(
10512 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10513 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10514 Store->getMemoryVT(), Store->getMemOperand());
10515}
10516
10517SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10518 SelectionDAG &DAG) const {
10519 SDLoc DL(Op);
10520 MVT VT = Op.getSimpleValueType();
10521
10522 const auto *MemSD = cast<MemSDNode>(Op);
10523 EVT MemVT = MemSD->getMemoryVT();
10524 MachineMemOperand *MMO = MemSD->getMemOperand();
10525 SDValue Chain = MemSD->getChain();
10526 SDValue BasePtr = MemSD->getBasePtr();
10527
10528 SDValue Mask, PassThru, VL;
10529 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10530 Mask = VPLoad->getMask();
10531 PassThru = DAG.getUNDEF(VT);
10532 VL = VPLoad->getVectorLength();
10533 } else {
10534 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10535 Mask = MLoad->getMask();
10536 PassThru = MLoad->getPassThru();
10537 }
10538
10539 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10540
10541 MVT XLenVT = Subtarget.getXLenVT();
10542
10543 MVT ContainerVT = VT;
10544 if (VT.isFixedLengthVector()) {
10545 ContainerVT = getContainerForFixedLengthVector(VT);
10546 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10547 if (!IsUnmasked) {
10548 MVT MaskVT = getMaskTypeFor(ContainerVT);
10549 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10550 }
10551 }
10552
10553 if (!VL)
10554 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10555
10556 unsigned IntID =
10557 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10558 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10559 if (IsUnmasked)
10560 Ops.push_back(DAG.getUNDEF(ContainerVT));
10561 else
10562 Ops.push_back(PassThru);
10563 Ops.push_back(BasePtr);
10564 if (!IsUnmasked)
10565 Ops.push_back(Mask);
10566 Ops.push_back(VL);
10567 if (!IsUnmasked)
10569
10570 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10571
10572 SDValue Result =
10573 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10574 Chain = Result.getValue(1);
10575
10576 if (VT.isFixedLengthVector())
10577 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10578
10579 return DAG.getMergeValues({Result, Chain}, DL);
10580}
10581
10582SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10583 SelectionDAG &DAG) const {
10584 SDLoc DL(Op);
10585
10586 const auto *MemSD = cast<MemSDNode>(Op);
10587 EVT MemVT = MemSD->getMemoryVT();
10588 MachineMemOperand *MMO = MemSD->getMemOperand();
10589 SDValue Chain = MemSD->getChain();
10590 SDValue BasePtr = MemSD->getBasePtr();
10591 SDValue Val, Mask, VL;
10592
10593 bool IsCompressingStore = false;
10594 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10595 Val = VPStore->getValue();
10596 Mask = VPStore->getMask();
10597 VL = VPStore->getVectorLength();
10598 } else {
10599 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10600 Val = MStore->getValue();
10601 Mask = MStore->getMask();
10602 IsCompressingStore = MStore->isCompressingStore();
10603 }
10604
10605 bool IsUnmasked =
10606 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10607
10608 MVT VT = Val.getSimpleValueType();
10609 MVT XLenVT = Subtarget.getXLenVT();
10610
10611 MVT ContainerVT = VT;
10612 if (VT.isFixedLengthVector()) {
10613 ContainerVT = getContainerForFixedLengthVector(VT);
10614
10615 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10616 if (!IsUnmasked || IsCompressingStore) {
10617 MVT MaskVT = getMaskTypeFor(ContainerVT);
10618 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10619 }
10620 }
10621
10622 if (!VL)
10623 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10624
10625 if (IsCompressingStore) {
10626 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10627 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10628 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10629 VL =
10630 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10631 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10632 }
10633
10634 unsigned IntID =
10635 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10636 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10637 Ops.push_back(Val);
10638 Ops.push_back(BasePtr);
10639 if (!IsUnmasked)
10640 Ops.push_back(Mask);
10641 Ops.push_back(VL);
10642
10644 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10645}
10646
10647SDValue
10648RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10649 SelectionDAG &DAG) const {
10650 MVT InVT = Op.getOperand(0).getSimpleValueType();
10651 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10652
10653 MVT VT = Op.getSimpleValueType();
10654
10655 SDValue Op1 =
10656 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10657 SDValue Op2 =
10658 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10659
10660 SDLoc DL(Op);
10661 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10662 DAG, Subtarget);
10663 MVT MaskVT = getMaskTypeFor(ContainerVT);
10664
10665 SDValue Cmp =
10666 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10667 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10668
10669 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10670}
10671
10672SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10673 SelectionDAG &DAG) const {
10674 unsigned Opc = Op.getOpcode();
10675 SDLoc DL(Op);
10676 SDValue Chain = Op.getOperand(0);
10677 SDValue Op1 = Op.getOperand(1);
10678 SDValue Op2 = Op.getOperand(2);
10679 SDValue CC = Op.getOperand(3);
10680 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10681 MVT VT = Op.getSimpleValueType();
10682 MVT InVT = Op1.getSimpleValueType();
10683
10684 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10685 // condition code.
10686 if (Opc == ISD::STRICT_FSETCCS) {
10687 // Expand strict_fsetccs(x, oeq) to
10688 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10689 SDVTList VTList = Op->getVTList();
10690 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10691 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10692 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10693 Op2, OLECCVal);
10694 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10695 Op1, OLECCVal);
10696 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10697 Tmp1.getValue(1), Tmp2.getValue(1));
10698 // Tmp1 and Tmp2 might be the same node.
10699 if (Tmp1 != Tmp2)
10700 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10701 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10702 }
10703
10704 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10705 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10706 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10707 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10708 Op2, OEQCCVal);
10709 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10710 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10711 }
10712 }
10713
10714 MVT ContainerInVT = InVT;
10715 if (InVT.isFixedLengthVector()) {
10716 ContainerInVT = getContainerForFixedLengthVector(InVT);
10717 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10718 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10719 }
10720 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10721
10722 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10723
10724 SDValue Res;
10725 if (Opc == ISD::STRICT_FSETCC &&
10726 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10727 CCVal == ISD::SETOLE)) {
10728 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10729 // active when both input elements are ordered.
10730 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10731 SDValue OrderMask1 = DAG.getNode(
10732 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10733 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10734 True, VL});
10735 SDValue OrderMask2 = DAG.getNode(
10736 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10737 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10738 True, VL});
10739 Mask =
10740 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10741 // Use Mask as the merge operand to let the result be 0 if either of the
10742 // inputs is unordered.
10744 DAG.getVTList(MaskVT, MVT::Other),
10745 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10746 } else {
10747 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10749 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10750 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10751 }
10752
10753 if (VT.isFixedLengthVector()) {
10754 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10755 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10756 }
10757 return Res;
10758}
10759
10760// Lower vector ABS to smax(X, sub(0, X)).
10761SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10762 SDLoc DL(Op);
10763 MVT VT = Op.getSimpleValueType();
10764 SDValue X = Op.getOperand(0);
10765
10766 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10767 "Unexpected type for ISD::ABS");
10768
10769 MVT ContainerVT = VT;
10770 if (VT.isFixedLengthVector()) {
10771 ContainerVT = getContainerForFixedLengthVector(VT);
10772 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10773 }
10774
10775 SDValue Mask, VL;
10776 if (Op->getOpcode() == ISD::VP_ABS) {
10777 Mask = Op->getOperand(1);
10778 if (VT.isFixedLengthVector())
10779 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10780 Subtarget);
10781 VL = Op->getOperand(2);
10782 } else
10783 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10784
10785 SDValue SplatZero = DAG.getNode(
10786 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10787 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10788 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10789 DAG.getUNDEF(ContainerVT), Mask, VL);
10790 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10791 DAG.getUNDEF(ContainerVT), Mask, VL);
10792
10793 if (VT.isFixedLengthVector())
10794 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10795 return Max;
10796}
10797
10798SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10799 SDValue Op, SelectionDAG &DAG) const {
10800 SDLoc DL(Op);
10801 MVT VT = Op.getSimpleValueType();
10802 SDValue Mag = Op.getOperand(0);
10803 SDValue Sign = Op.getOperand(1);
10804 assert(Mag.getValueType() == Sign.getValueType() &&
10805 "Can only handle COPYSIGN with matching types.");
10806
10807 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10808 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10809 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10810
10811 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10812
10813 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10814 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10815
10816 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10817}
10818
10819SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10820 SDValue Op, SelectionDAG &DAG) const {
10821 MVT VT = Op.getSimpleValueType();
10822 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10823
10824 MVT I1ContainerVT =
10825 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10826
10827 SDValue CC =
10828 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10829 SDValue Op1 =
10830 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10831 SDValue Op2 =
10832 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10833
10834 SDLoc DL(Op);
10835 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10836
10837 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
10838 Op2, DAG.getUNDEF(ContainerVT), VL);
10839
10840 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10841}
10842
10843SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10844 SelectionDAG &DAG) const {
10845 unsigned NewOpc = getRISCVVLOp(Op);
10846 bool HasMergeOp = hasMergeOp(NewOpc);
10847 bool HasMask = hasMaskOp(NewOpc);
10848
10849 MVT VT = Op.getSimpleValueType();
10850 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10851
10852 // Create list of operands by converting existing ones to scalable types.
10854 for (const SDValue &V : Op->op_values()) {
10855 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10856
10857 // Pass through non-vector operands.
10858 if (!V.getValueType().isVector()) {
10859 Ops.push_back(V);
10860 continue;
10861 }
10862
10863 // "cast" fixed length vector to a scalable vector.
10864 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10865 "Only fixed length vectors are supported!");
10866 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10867 }
10868
10869 SDLoc DL(Op);
10870 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10871 if (HasMergeOp)
10872 Ops.push_back(DAG.getUNDEF(ContainerVT));
10873 if (HasMask)
10874 Ops.push_back(Mask);
10875 Ops.push_back(VL);
10876
10877 // StrictFP operations have two result values. Their lowered result should
10878 // have same result count.
10879 if (Op->isStrictFPOpcode()) {
10880 SDValue ScalableRes =
10881 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10882 Op->getFlags());
10883 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10884 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10885 }
10886
10887 SDValue ScalableRes =
10888 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10889 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10890}
10891
10892// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10893// * Operands of each node are assumed to be in the same order.
10894// * The EVL operand is promoted from i32 to i64 on RV64.
10895// * Fixed-length vectors are converted to their scalable-vector container
10896// types.
10897SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10898 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10899 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
10900
10901 SDLoc DL(Op);
10902 MVT VT = Op.getSimpleValueType();
10904
10905 MVT ContainerVT = VT;
10906 if (VT.isFixedLengthVector())
10907 ContainerVT = getContainerForFixedLengthVector(VT);
10908
10909 for (const auto &OpIdx : enumerate(Op->ops())) {
10910 SDValue V = OpIdx.value();
10911 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10912 // Add dummy merge value before the mask. Or if there isn't a mask, before
10913 // EVL.
10914 if (HasMergeOp) {
10915 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
10916 if (MaskIdx) {
10917 if (*MaskIdx == OpIdx.index())
10918 Ops.push_back(DAG.getUNDEF(ContainerVT));
10919 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
10920 OpIdx.index()) {
10921 if (Op.getOpcode() == ISD::VP_MERGE) {
10922 // For VP_MERGE, copy the false operand instead of an undef value.
10923 Ops.push_back(Ops.back());
10924 } else {
10925 assert(Op.getOpcode() == ISD::VP_SELECT);
10926 // For VP_SELECT, add an undef value.
10927 Ops.push_back(DAG.getUNDEF(ContainerVT));
10928 }
10929 }
10930 }
10931 // Pass through operands which aren't fixed-length vectors.
10932 if (!V.getValueType().isFixedLengthVector()) {
10933 Ops.push_back(V);
10934 continue;
10935 }
10936 // "cast" fixed length vector to a scalable vector.
10937 MVT OpVT = V.getSimpleValueType();
10938 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10939 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10940 "Only fixed length vectors are supported!");
10941 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10942 }
10943
10944 if (!VT.isFixedLengthVector())
10945 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10946
10947 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10948
10949 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10950}
10951
10952SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10953 SelectionDAG &DAG) const {
10954 SDLoc DL(Op);
10955 MVT VT = Op.getSimpleValueType();
10956
10957 SDValue Src = Op.getOperand(0);
10958 // NOTE: Mask is dropped.
10959 SDValue VL = Op.getOperand(2);
10960
10961 MVT ContainerVT = VT;
10962 if (VT.isFixedLengthVector()) {
10963 ContainerVT = getContainerForFixedLengthVector(VT);
10964 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10965 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10966 }
10967
10968 MVT XLenVT = Subtarget.getXLenVT();
10969 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10970 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10971 DAG.getUNDEF(ContainerVT), Zero, VL);
10972
10973 SDValue SplatValue = DAG.getConstant(
10974 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10975 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10976 DAG.getUNDEF(ContainerVT), SplatValue, VL);
10977
10978 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
10979 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
10980 if (!VT.isFixedLengthVector())
10981 return Result;
10982 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10983}
10984
10985SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10986 SelectionDAG &DAG) const {
10987 SDLoc DL(Op);
10988 MVT VT = Op.getSimpleValueType();
10989
10990 SDValue Op1 = Op.getOperand(0);
10991 SDValue Op2 = Op.getOperand(1);
10992 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10993 // NOTE: Mask is dropped.
10994 SDValue VL = Op.getOperand(4);
10995
10996 MVT ContainerVT = VT;
10997 if (VT.isFixedLengthVector()) {
10998 ContainerVT = getContainerForFixedLengthVector(VT);
10999 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11000 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11001 }
11002
11004 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11005
11006 switch (Condition) {
11007 default:
11008 break;
11009 // X != Y --> (X^Y)
11010 case ISD::SETNE:
11011 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11012 break;
11013 // X == Y --> ~(X^Y)
11014 case ISD::SETEQ: {
11015 SDValue Temp =
11016 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11017 Result =
11018 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11019 break;
11020 }
11021 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11022 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11023 case ISD::SETGT:
11024 case ISD::SETULT: {
11025 SDValue Temp =
11026 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11027 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11028 break;
11029 }
11030 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11031 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11032 case ISD::SETLT:
11033 case ISD::SETUGT: {
11034 SDValue Temp =
11035 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11036 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11037 break;
11038 }
11039 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11040 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11041 case ISD::SETGE:
11042 case ISD::SETULE: {
11043 SDValue Temp =
11044 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11045 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11046 break;
11047 }
11048 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11049 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11050 case ISD::SETLE:
11051 case ISD::SETUGE: {
11052 SDValue Temp =
11053 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11054 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11055 break;
11056 }
11057 }
11058
11059 if (!VT.isFixedLengthVector())
11060 return Result;
11061 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11062}
11063
11064// Lower Floating-Point/Integer Type-Convert VP SDNodes
11065SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11066 SelectionDAG &DAG) const {
11067 SDLoc DL(Op);
11068
11069 SDValue Src = Op.getOperand(0);
11070 SDValue Mask = Op.getOperand(1);
11071 SDValue VL = Op.getOperand(2);
11072 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11073
11074 MVT DstVT = Op.getSimpleValueType();
11075 MVT SrcVT = Src.getSimpleValueType();
11076 if (DstVT.isFixedLengthVector()) {
11077 DstVT = getContainerForFixedLengthVector(DstVT);
11078 SrcVT = getContainerForFixedLengthVector(SrcVT);
11079 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11080 MVT MaskVT = getMaskTypeFor(DstVT);
11081 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11082 }
11083
11084 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11085 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11086
11088 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11089 if (SrcVT.isInteger()) {
11090 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11091
11092 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11095
11096 // Do we need to do any pre-widening before converting?
11097 if (SrcEltSize == 1) {
11098 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11099 MVT XLenVT = Subtarget.getXLenVT();
11100 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11101 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11102 DAG.getUNDEF(IntVT), Zero, VL);
11103 SDValue One = DAG.getConstant(
11104 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11105 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11106 DAG.getUNDEF(IntVT), One, VL);
11107 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11108 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11109 } else if (DstEltSize > (2 * SrcEltSize)) {
11110 // Widen before converting.
11111 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11112 DstVT.getVectorElementCount());
11113 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11114 }
11115
11116 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11117 } else {
11118 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11119 "Wrong input/output vector types");
11120
11121 // Convert f16 to f32 then convert f32 to i64.
11122 if (DstEltSize > (2 * SrcEltSize)) {
11123 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11124 MVT InterimFVT =
11125 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11126 Src =
11127 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11128 }
11129
11130 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11131 }
11132 } else { // Narrowing + Conversion
11133 if (SrcVT.isInteger()) {
11134 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11135 // First do a narrowing convert to an FP type half the size, then round
11136 // the FP type to a small FP type if needed.
11137
11138 MVT InterimFVT = DstVT;
11139 if (SrcEltSize > (2 * DstEltSize)) {
11140 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11141 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11142 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11143 }
11144
11145 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11146
11147 if (InterimFVT != DstVT) {
11148 Src = Result;
11149 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11150 }
11151 } else {
11152 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11153 "Wrong input/output vector types");
11154 // First do a narrowing conversion to an integer half the size, then
11155 // truncate if needed.
11156
11157 if (DstEltSize == 1) {
11158 // First convert to the same size integer, then convert to mask using
11159 // setcc.
11160 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11161 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11162 DstVT.getVectorElementCount());
11163 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11164
11165 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11166 // otherwise the conversion was undefined.
11167 MVT XLenVT = Subtarget.getXLenVT();
11168 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11169 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11170 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11171 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11172 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11173 DAG.getUNDEF(DstVT), Mask, VL});
11174 } else {
11175 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11176 DstVT.getVectorElementCount());
11177
11178 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11179
11180 while (InterimIVT != DstVT) {
11181 SrcEltSize /= 2;
11182 Src = Result;
11183 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11184 DstVT.getVectorElementCount());
11185 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11186 Src, Mask, VL);
11187 }
11188 }
11189 }
11190 }
11191
11192 MVT VT = Op.getSimpleValueType();
11193 if (!VT.isFixedLengthVector())
11194 return Result;
11195 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11196}
11197
11198SDValue
11199RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11200 SelectionDAG &DAG) const {
11201 SDLoc DL(Op);
11202
11203 SDValue Op1 = Op.getOperand(0);
11204 SDValue Op2 = Op.getOperand(1);
11205 SDValue Offset = Op.getOperand(2);
11206 SDValue Mask = Op.getOperand(3);
11207 SDValue EVL1 = Op.getOperand(4);
11208 SDValue EVL2 = Op.getOperand(5);
11209
11210 const MVT XLenVT = Subtarget.getXLenVT();
11211 MVT VT = Op.getSimpleValueType();
11212 MVT ContainerVT = VT;
11213 if (VT.isFixedLengthVector()) {
11214 ContainerVT = getContainerForFixedLengthVector(VT);
11215 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11216 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11217 MVT MaskVT = getMaskTypeFor(ContainerVT);
11218 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11219 }
11220
11221 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11222 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11223
11224 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11225 if (IsMaskVector) {
11226 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11227
11228 // Expand input operands
11229 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11230 DAG.getUNDEF(ContainerVT),
11231 DAG.getConstant(1, DL, XLenVT), EVL1);
11232 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11233 DAG.getUNDEF(ContainerVT),
11234 DAG.getConstant(0, DL, XLenVT), EVL1);
11235 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11236 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11237
11238 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11239 DAG.getUNDEF(ContainerVT),
11240 DAG.getConstant(1, DL, XLenVT), EVL2);
11241 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11242 DAG.getUNDEF(ContainerVT),
11243 DAG.getConstant(0, DL, XLenVT), EVL2);
11244 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11245 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11246 }
11247
11248 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11249 SDValue DownOffset, UpOffset;
11250 if (ImmValue >= 0) {
11251 // The operand is a TargetConstant, we need to rebuild it as a regular
11252 // constant.
11253 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11254 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11255 } else {
11256 // The operand is a TargetConstant, we need to rebuild it as a regular
11257 // constant rather than negating the original operand.
11258 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11259 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11260 }
11261
11262 SDValue SlideDown =
11263 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11264 Op1, DownOffset, Mask, UpOffset);
11265 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11266 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11267
11268 if (IsMaskVector) {
11269 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11270 Result = DAG.getNode(
11271 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11272 {Result, DAG.getConstant(0, DL, ContainerVT),
11273 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11274 Mask, EVL2});
11275 }
11276
11277 if (!VT.isFixedLengthVector())
11278 return Result;
11279 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11280}
11281
11282SDValue
11283RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11284 SelectionDAG &DAG) const {
11285 SDLoc DL(Op);
11286 MVT VT = Op.getSimpleValueType();
11287 MVT XLenVT = Subtarget.getXLenVT();
11288
11289 SDValue Op1 = Op.getOperand(0);
11290 SDValue Mask = Op.getOperand(1);
11291 SDValue EVL = Op.getOperand(2);
11292
11293 MVT ContainerVT = VT;
11294 if (VT.isFixedLengthVector()) {
11295 ContainerVT = getContainerForFixedLengthVector(VT);
11296 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11297 MVT MaskVT = getMaskTypeFor(ContainerVT);
11298 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11299 }
11300
11301 MVT GatherVT = ContainerVT;
11302 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11303 // Check if we are working with mask vectors
11304 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11305 if (IsMaskVector) {
11306 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11307
11308 // Expand input operand
11309 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11310 DAG.getUNDEF(IndicesVT),
11311 DAG.getConstant(1, DL, XLenVT), EVL);
11312 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11313 DAG.getUNDEF(IndicesVT),
11314 DAG.getConstant(0, DL, XLenVT), EVL);
11315 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11316 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11317 }
11318
11319 unsigned EltSize = GatherVT.getScalarSizeInBits();
11320 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11321 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11322 unsigned MaxVLMAX =
11323 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11324
11325 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11326 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11327 // to use vrgatherei16.vv.
11328 // TODO: It's also possible to use vrgatherei16.vv for other types to
11329 // decrease register width for the index calculation.
11330 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11331 if (MaxVLMAX > 256 && EltSize == 8) {
11332 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11333 // Split the vector in half and reverse each half using a full register
11334 // reverse.
11335 // Swap the halves and concatenate them.
11336 // Slide the concatenated result by (VLMax - VL).
11337 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11338 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11339 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11340
11341 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11342 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11343
11344 // Reassemble the low and high pieces reversed.
11345 // NOTE: this Result is unmasked (because we do not need masks for
11346 // shuffles). If in the future this has to change, we can use a SELECT_VL
11347 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11348 SDValue Result =
11349 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11350
11351 // Slide off any elements from past EVL that were reversed into the low
11352 // elements.
11353 unsigned MinElts = GatherVT.getVectorMinNumElements();
11354 SDValue VLMax =
11355 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11356 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11357
11358 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11359 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11360
11361 if (IsMaskVector) {
11362 // Truncate Result back to a mask vector
11363 Result =
11364 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11365 {Result, DAG.getConstant(0, DL, GatherVT),
11367 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11368 }
11369
11370 if (!VT.isFixedLengthVector())
11371 return Result;
11372 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11373 }
11374
11375 // Just promote the int type to i16 which will double the LMUL.
11376 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11377 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11378 }
11379
11380 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11381 SDValue VecLen =
11382 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11383 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11384 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11385 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11386 DAG.getUNDEF(IndicesVT), Mask, EVL);
11387 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11388 DAG.getUNDEF(GatherVT), Mask, EVL);
11389
11390 if (IsMaskVector) {
11391 // Truncate Result back to a mask vector
11392 Result = DAG.getNode(
11393 RISCVISD::SETCC_VL, DL, ContainerVT,
11394 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11395 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11396 }
11397
11398 if (!VT.isFixedLengthVector())
11399 return Result;
11400 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11401}
11402
11403SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11404 SelectionDAG &DAG) const {
11405 MVT VT = Op.getSimpleValueType();
11406 if (VT.getVectorElementType() != MVT::i1)
11407 return lowerVPOp(Op, DAG);
11408
11409 // It is safe to drop mask parameter as masked-off elements are undef.
11410 SDValue Op1 = Op->getOperand(0);
11411 SDValue Op2 = Op->getOperand(1);
11412 SDValue VL = Op->getOperand(3);
11413
11414 MVT ContainerVT = VT;
11415 const bool IsFixed = VT.isFixedLengthVector();
11416 if (IsFixed) {
11417 ContainerVT = getContainerForFixedLengthVector(VT);
11418 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11419 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11420 }
11421
11422 SDLoc DL(Op);
11423 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11424 if (!IsFixed)
11425 return Val;
11426 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11427}
11428
11429SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11430 SelectionDAG &DAG) const {
11431 SDLoc DL(Op);
11432 MVT XLenVT = Subtarget.getXLenVT();
11433 MVT VT = Op.getSimpleValueType();
11434 MVT ContainerVT = VT;
11435 if (VT.isFixedLengthVector())
11436 ContainerVT = getContainerForFixedLengthVector(VT);
11437
11438 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11439
11440 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11441 // Check if the mask is known to be all ones
11442 SDValue Mask = VPNode->getMask();
11443 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11444
11445 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11446 : Intrinsic::riscv_vlse_mask,
11447 DL, XLenVT);
11448 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11449 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11450 VPNode->getStride()};
11451 if (!IsUnmasked) {
11452 if (VT.isFixedLengthVector()) {
11453 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11454 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11455 }
11456 Ops.push_back(Mask);
11457 }
11458 Ops.push_back(VPNode->getVectorLength());
11459 if (!IsUnmasked) {
11460 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11461 Ops.push_back(Policy);
11462 }
11463
11464 SDValue Result =
11466 VPNode->getMemoryVT(), VPNode->getMemOperand());
11467 SDValue Chain = Result.getValue(1);
11468
11469 if (VT.isFixedLengthVector())
11470 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11471
11472 return DAG.getMergeValues({Result, Chain}, DL);
11473}
11474
11475SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11476 SelectionDAG &DAG) const {
11477 SDLoc DL(Op);
11478 MVT XLenVT = Subtarget.getXLenVT();
11479
11480 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11481 SDValue StoreVal = VPNode->getValue();
11482 MVT VT = StoreVal.getSimpleValueType();
11483 MVT ContainerVT = VT;
11484 if (VT.isFixedLengthVector()) {
11485 ContainerVT = getContainerForFixedLengthVector(VT);
11486 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11487 }
11488
11489 // Check if the mask is known to be all ones
11490 SDValue Mask = VPNode->getMask();
11491 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11492
11493 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11494 : Intrinsic::riscv_vsse_mask,
11495 DL, XLenVT);
11496 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11497 VPNode->getBasePtr(), VPNode->getStride()};
11498 if (!IsUnmasked) {
11499 if (VT.isFixedLengthVector()) {
11500 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11501 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11502 }
11503 Ops.push_back(Mask);
11504 }
11505 Ops.push_back(VPNode->getVectorLength());
11506
11507 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11508 Ops, VPNode->getMemoryVT(),
11509 VPNode->getMemOperand());
11510}
11511
11512// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11513// matched to a RVV indexed load. The RVV indexed load instructions only
11514// support the "unsigned unscaled" addressing mode; indices are implicitly
11515// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11516// signed or scaled indexing is extended to the XLEN value type and scaled
11517// accordingly.
11518SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11519 SelectionDAG &DAG) const {
11520 SDLoc DL(Op);
11521 MVT VT = Op.getSimpleValueType();
11522
11523 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11524 EVT MemVT = MemSD->getMemoryVT();
11525 MachineMemOperand *MMO = MemSD->getMemOperand();
11526 SDValue Chain = MemSD->getChain();
11527 SDValue BasePtr = MemSD->getBasePtr();
11528
11529 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11530 SDValue Index, Mask, PassThru, VL;
11531
11532 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11533 Index = VPGN->getIndex();
11534 Mask = VPGN->getMask();
11535 PassThru = DAG.getUNDEF(VT);
11536 VL = VPGN->getVectorLength();
11537 // VP doesn't support extending loads.
11539 } else {
11540 // Else it must be a MGATHER.
11541 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11542 Index = MGN->getIndex();
11543 Mask = MGN->getMask();
11544 PassThru = MGN->getPassThru();
11545 LoadExtType = MGN->getExtensionType();
11546 }
11547
11548 MVT IndexVT = Index.getSimpleValueType();
11549 MVT XLenVT = Subtarget.getXLenVT();
11550
11552 "Unexpected VTs!");
11553 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11554 // Targets have to explicitly opt-in for extending vector loads.
11555 assert(LoadExtType == ISD::NON_EXTLOAD &&
11556 "Unexpected extending MGATHER/VP_GATHER");
11557
11558 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11559 // the selection of the masked intrinsics doesn't do this for us.
11560 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11561
11562 MVT ContainerVT = VT;
11563 if (VT.isFixedLengthVector()) {
11564 ContainerVT = getContainerForFixedLengthVector(VT);
11565 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11566 ContainerVT.getVectorElementCount());
11567
11568 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11569
11570 if (!IsUnmasked) {
11571 MVT MaskVT = getMaskTypeFor(ContainerVT);
11572 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11573 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11574 }
11575 }
11576
11577 if (!VL)
11578 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11579
11580 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11581 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11582 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11583 }
11584
11585 unsigned IntID =
11586 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11587 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11588 if (IsUnmasked)
11589 Ops.push_back(DAG.getUNDEF(ContainerVT));
11590 else
11591 Ops.push_back(PassThru);
11592 Ops.push_back(BasePtr);
11593 Ops.push_back(Index);
11594 if (!IsUnmasked)
11595 Ops.push_back(Mask);
11596 Ops.push_back(VL);
11597 if (!IsUnmasked)
11599
11600 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11601 SDValue Result =
11602 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11603 Chain = Result.getValue(1);
11604
11605 if (VT.isFixedLengthVector())
11606 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11607
11608 return DAG.getMergeValues({Result, Chain}, DL);
11609}
11610
11611// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11612// matched to a RVV indexed store. The RVV indexed store instructions only
11613// support the "unsigned unscaled" addressing mode; indices are implicitly
11614// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11615// signed or scaled indexing is extended to the XLEN value type and scaled
11616// accordingly.
11617SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11618 SelectionDAG &DAG) const {
11619 SDLoc DL(Op);
11620 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11621 EVT MemVT = MemSD->getMemoryVT();
11622 MachineMemOperand *MMO = MemSD->getMemOperand();
11623 SDValue Chain = MemSD->getChain();
11624 SDValue BasePtr = MemSD->getBasePtr();
11625
11626 [[maybe_unused]] bool IsTruncatingStore = false;
11627 SDValue Index, Mask, Val, VL;
11628
11629 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11630 Index = VPSN->getIndex();
11631 Mask = VPSN->getMask();
11632 Val = VPSN->getValue();
11633 VL = VPSN->getVectorLength();
11634 // VP doesn't support truncating stores.
11635 IsTruncatingStore = false;
11636 } else {
11637 // Else it must be a MSCATTER.
11638 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11639 Index = MSN->getIndex();
11640 Mask = MSN->getMask();
11641 Val = MSN->getValue();
11642 IsTruncatingStore = MSN->isTruncatingStore();
11643 }
11644
11645 MVT VT = Val.getSimpleValueType();
11646 MVT IndexVT = Index.getSimpleValueType();
11647 MVT XLenVT = Subtarget.getXLenVT();
11648
11650 "Unexpected VTs!");
11651 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11652 // Targets have to explicitly opt-in for extending vector loads and
11653 // truncating vector stores.
11654 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11655
11656 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11657 // the selection of the masked intrinsics doesn't do this for us.
11658 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11659
11660 MVT ContainerVT = VT;
11661 if (VT.isFixedLengthVector()) {
11662 ContainerVT = getContainerForFixedLengthVector(VT);
11663 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11664 ContainerVT.getVectorElementCount());
11665
11666 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11667 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11668
11669 if (!IsUnmasked) {
11670 MVT MaskVT = getMaskTypeFor(ContainerVT);
11671 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11672 }
11673 }
11674
11675 if (!VL)
11676 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11677
11678 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11679 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11680 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11681 }
11682
11683 unsigned IntID =
11684 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11685 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11686 Ops.push_back(Val);
11687 Ops.push_back(BasePtr);
11688 Ops.push_back(Index);
11689 if (!IsUnmasked)
11690 Ops.push_back(Mask);
11691 Ops.push_back(VL);
11692
11694 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11695}
11696
11697SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11698 SelectionDAG &DAG) const {
11699 const MVT XLenVT = Subtarget.getXLenVT();
11700 SDLoc DL(Op);
11701 SDValue Chain = Op->getOperand(0);
11702 SDValue SysRegNo = DAG.getTargetConstant(
11703 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11704 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11705 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11706
11707 // Encoding used for rounding mode in RISC-V differs from that used in
11708 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11709 // table, which consists of a sequence of 4-bit fields, each representing
11710 // corresponding FLT_ROUNDS mode.
11711 static const int Table =
11717
11718 SDValue Shift =
11719 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11720 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11721 DAG.getConstant(Table, DL, XLenVT), Shift);
11722 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11723 DAG.getConstant(7, DL, XLenVT));
11724
11725 return DAG.getMergeValues({Masked, Chain}, DL);
11726}
11727
11728SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11729 SelectionDAG &DAG) const {
11730 const MVT XLenVT = Subtarget.getXLenVT();
11731 SDLoc DL(Op);
11732 SDValue Chain = Op->getOperand(0);
11733 SDValue RMValue = Op->getOperand(1);
11734 SDValue SysRegNo = DAG.getTargetConstant(
11735 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11736
11737 // Encoding used for rounding mode in RISC-V differs from that used in
11738 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11739 // a table, which consists of a sequence of 4-bit fields, each representing
11740 // corresponding RISC-V mode.
11741 static const unsigned Table =
11747
11748 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11749
11750 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11751 DAG.getConstant(2, DL, XLenVT));
11752 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11753 DAG.getConstant(Table, DL, XLenVT), Shift);
11754 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11755 DAG.getConstant(0x7, DL, XLenVT));
11756 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11757 RMValue);
11758}
11759
11760SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11761 SelectionDAG &DAG) const {
11763
11764 bool isRISCV64 = Subtarget.is64Bit();
11765 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11766
11767 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11768 return DAG.getFrameIndex(FI, PtrVT);
11769}
11770
11771// Returns the opcode of the target-specific SDNode that implements the 32-bit
11772// form of the given Opcode.
11773static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11774 switch (Opcode) {
11775 default:
11776 llvm_unreachable("Unexpected opcode");
11777 case ISD::SHL:
11778 return RISCVISD::SLLW;
11779 case ISD::SRA:
11780 return RISCVISD::SRAW;
11781 case ISD::SRL:
11782 return RISCVISD::SRLW;
11783 case ISD::SDIV:
11784 return RISCVISD::DIVW;
11785 case ISD::UDIV:
11786 return RISCVISD::DIVUW;
11787 case ISD::UREM:
11788 return RISCVISD::REMUW;
11789 case ISD::ROTL:
11790 return RISCVISD::ROLW;
11791 case ISD::ROTR:
11792 return RISCVISD::RORW;
11793 }
11794}
11795
11796// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11797// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11798// otherwise be promoted to i64, making it difficult to select the
11799// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11800// type i8/i16/i32 is lost.
11802 unsigned ExtOpc = ISD::ANY_EXTEND) {
11803 SDLoc DL(N);
11804 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11805 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11806 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11807 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11808 // ReplaceNodeResults requires we maintain the same type for the return value.
11809 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11810}
11811
11812// Converts the given 32-bit operation to a i64 operation with signed extension
11813// semantic to reduce the signed extension instructions.
11815 SDLoc DL(N);
11816 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11817 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11818 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11819 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11820 DAG.getValueType(MVT::i32));
11821 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11822}
11823
11826 SelectionDAG &DAG) const {
11827 SDLoc DL(N);
11828 switch (N->getOpcode()) {
11829 default:
11830 llvm_unreachable("Don't know how to custom type legalize this operation!");
11833 case ISD::FP_TO_SINT:
11834 case ISD::FP_TO_UINT: {
11835 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11836 "Unexpected custom legalisation");
11837 bool IsStrict = N->isStrictFPOpcode();
11838 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11839 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11840 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11841 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11843 if (!isTypeLegal(Op0.getValueType()))
11844 return;
11845 if (IsStrict) {
11846 SDValue Chain = N->getOperand(0);
11847 // In absense of Zfh, promote f16 to f32, then convert.
11848 if (Op0.getValueType() == MVT::f16 &&
11849 !Subtarget.hasStdExtZfhOrZhinx()) {
11850 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11851 {Chain, Op0});
11852 Chain = Op0.getValue(1);
11853 }
11854 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11856 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11857 SDValue Res = DAG.getNode(
11858 Opc, DL, VTs, Chain, Op0,
11859 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11860 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11861 Results.push_back(Res.getValue(1));
11862 return;
11863 }
11864 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11865 // convert.
11866 if ((Op0.getValueType() == MVT::f16 &&
11867 !Subtarget.hasStdExtZfhOrZhinx()) ||
11868 Op0.getValueType() == MVT::bf16)
11869 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11870
11871 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11872 SDValue Res =
11873 DAG.getNode(Opc, DL, MVT::i64, Op0,
11874 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11875 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11876 return;
11877 }
11878 // If the FP type needs to be softened, emit a library call using the 'si'
11879 // version. If we left it to default legalization we'd end up with 'di'. If
11880 // the FP type doesn't need to be softened just let generic type
11881 // legalization promote the result type.
11882 RTLIB::Libcall LC;
11883 if (IsSigned)
11884 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11885 else
11886 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11887 MakeLibCallOptions CallOptions;
11888 EVT OpVT = Op0.getValueType();
11889 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11890 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11891 SDValue Result;
11892 std::tie(Result, Chain) =
11893 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11894 Results.push_back(Result);
11895 if (IsStrict)
11896 Results.push_back(Chain);
11897 break;
11898 }
11899 case ISD::LROUND: {
11900 SDValue Op0 = N->getOperand(0);
11901 EVT Op0VT = Op0.getValueType();
11902 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11904 if (!isTypeLegal(Op0VT))
11905 return;
11906
11907 // In absense of Zfh, promote f16 to f32, then convert.
11908 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11909 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11910
11911 SDValue Res =
11912 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11913 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11914 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11915 return;
11916 }
11917 // If the FP type needs to be softened, emit a library call to lround. We'll
11918 // need to truncate the result. We assume any value that doesn't fit in i32
11919 // is allowed to return an unspecified value.
11920 RTLIB::Libcall LC =
11921 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11922 MakeLibCallOptions CallOptions;
11923 EVT OpVT = Op0.getValueType();
11924 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11925 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11926 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11927 Results.push_back(Result);
11928 break;
11929 }
11932 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
11933 "has custom type legalization on riscv32");
11934
11935 SDValue LoCounter, HiCounter;
11936 MVT XLenVT = Subtarget.getXLenVT();
11937 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
11938 LoCounter = DAG.getTargetConstant(
11939 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
11940 HiCounter = DAG.getTargetConstant(
11941 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
11942 } else {
11943 LoCounter = DAG.getTargetConstant(
11944 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
11945 HiCounter = DAG.getTargetConstant(
11946 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
11947 }
11948 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11950 N->getOperand(0), LoCounter, HiCounter);
11951
11952 Results.push_back(
11953 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11954 Results.push_back(RCW.getValue(2));
11955 break;
11956 }
11957 case ISD::LOAD: {
11958 if (!ISD::isNON_EXTLoad(N))
11959 return;
11960
11961 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11962 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11963 LoadSDNode *Ld = cast<LoadSDNode>(N);
11964
11965 SDLoc dl(N);
11966 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
11967 Ld->getBasePtr(), Ld->getMemoryVT(),
11968 Ld->getMemOperand());
11969 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
11970 Results.push_back(Res.getValue(1));
11971 return;
11972 }
11973 case ISD::MUL: {
11974 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
11975 unsigned XLen = Subtarget.getXLen();
11976 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11977 if (Size > XLen) {
11978 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
11979 SDValue LHS = N->getOperand(0);
11980 SDValue RHS = N->getOperand(1);
11981 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
11982
11983 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
11984 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
11985 // We need exactly one side to be unsigned.
11986 if (LHSIsU == RHSIsU)
11987 return;
11988
11989 auto MakeMULPair = [&](SDValue S, SDValue U) {
11990 MVT XLenVT = Subtarget.getXLenVT();
11991 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
11992 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
11993 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
11994 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
11995 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
11996 };
11997
11998 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
11999 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12000
12001 // The other operand should be signed, but still prefer MULH when
12002 // possible.
12003 if (RHSIsU && LHSIsS && !RHSIsS)
12004 Results.push_back(MakeMULPair(LHS, RHS));
12005 else if (LHSIsU && RHSIsS && !LHSIsS)
12006 Results.push_back(MakeMULPair(RHS, LHS));
12007
12008 return;
12009 }
12010 [[fallthrough]];
12011 }
12012 case ISD::ADD:
12013 case ISD::SUB:
12014 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12015 "Unexpected custom legalisation");
12016 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12017 break;
12018 case ISD::SHL:
12019 case ISD::SRA:
12020 case ISD::SRL:
12021 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12022 "Unexpected custom legalisation");
12023 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12024 // If we can use a BSET instruction, allow default promotion to apply.
12025 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12026 isOneConstant(N->getOperand(0)))
12027 break;
12028 Results.push_back(customLegalizeToWOp(N, DAG));
12029 break;
12030 }
12031
12032 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12033 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12034 // shift amount.
12035 if (N->getOpcode() == ISD::SHL) {
12036 SDLoc DL(N);
12037 SDValue NewOp0 =
12038 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12039 SDValue NewOp1 =
12040 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12041 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12042 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12043 DAG.getValueType(MVT::i32));
12044 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12045 }
12046
12047 break;
12048 case ISD::ROTL:
12049 case ISD::ROTR:
12050 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12051 "Unexpected custom legalisation");
12052 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12053 Subtarget.hasVendorXTHeadBb()) &&
12054 "Unexpected custom legalization");
12055 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12056 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12057 return;
12058 Results.push_back(customLegalizeToWOp(N, DAG));
12059 break;
12060 case ISD::CTTZ:
12062 case ISD::CTLZ:
12063 case ISD::CTLZ_ZERO_UNDEF: {
12064 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12065 "Unexpected custom legalisation");
12066
12067 SDValue NewOp0 =
12068 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12069 bool IsCTZ =
12070 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12071 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12072 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12073 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12074 return;
12075 }
12076 case ISD::SDIV:
12077 case ISD::UDIV:
12078 case ISD::UREM: {
12079 MVT VT = N->getSimpleValueType(0);
12080 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12081 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12082 "Unexpected custom legalisation");
12083 // Don't promote division/remainder by constant since we should expand those
12084 // to multiply by magic constant.
12086 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12087 !isIntDivCheap(N->getValueType(0), Attr))
12088 return;
12089
12090 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12091 // the upper 32 bits. For other types we need to sign or zero extend
12092 // based on the opcode.
12093 unsigned ExtOpc = ISD::ANY_EXTEND;
12094 if (VT != MVT::i32)
12095 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12097
12098 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12099 break;
12100 }
12101 case ISD::SADDO: {
12102 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12103 "Unexpected custom legalisation");
12104
12105 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12106 // use the default legalization.
12107 if (!isa<ConstantSDNode>(N->getOperand(1)))
12108 return;
12109
12110 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12111 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12112 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12113 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12114 DAG.getValueType(MVT::i32));
12115
12116 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12117
12118 // For an addition, the result should be less than one of the operands (LHS)
12119 // if and only if the other operand (RHS) is negative, otherwise there will
12120 // be overflow.
12121 // For a subtraction, the result should be less than one of the operands
12122 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12123 // otherwise there will be overflow.
12124 EVT OType = N->getValueType(1);
12125 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12126 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12127
12128 SDValue Overflow =
12129 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12130 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12131 Results.push_back(Overflow);
12132 return;
12133 }
12134 case ISD::UADDO:
12135 case ISD::USUBO: {
12136 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12137 "Unexpected custom legalisation");
12138 bool IsAdd = N->getOpcode() == ISD::UADDO;
12139 // Create an ADDW or SUBW.
12140 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12141 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12142 SDValue Res =
12143 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12144 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12145 DAG.getValueType(MVT::i32));
12146
12147 SDValue Overflow;
12148 if (IsAdd && isOneConstant(RHS)) {
12149 // Special case uaddo X, 1 overflowed if the addition result is 0.
12150 // The general case (X + C) < C is not necessarily beneficial. Although we
12151 // reduce the live range of X, we may introduce the materialization of
12152 // constant C, especially when the setcc result is used by branch. We have
12153 // no compare with constant and branch instructions.
12154 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12155 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12156 } else if (IsAdd && isAllOnesConstant(RHS)) {
12157 // Special case uaddo X, -1 overflowed if X != 0.
12158 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12159 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12160 } else {
12161 // Sign extend the LHS and perform an unsigned compare with the ADDW
12162 // result. Since the inputs are sign extended from i32, this is equivalent
12163 // to comparing the lower 32 bits.
12164 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12165 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12166 IsAdd ? ISD::SETULT : ISD::SETUGT);
12167 }
12168
12169 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12170 Results.push_back(Overflow);
12171 return;
12172 }
12173 case ISD::UADDSAT:
12174 case ISD::USUBSAT: {
12175 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12176 "Unexpected custom legalisation");
12177 if (Subtarget.hasStdExtZbb()) {
12178 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12179 // sign extend allows overflow of the lower 32 bits to be detected on
12180 // the promoted size.
12181 SDValue LHS =
12182 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12183 SDValue RHS =
12184 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12185 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12186 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12187 return;
12188 }
12189
12190 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12191 // promotion for UADDO/USUBO.
12192 Results.push_back(expandAddSubSat(N, DAG));
12193 return;
12194 }
12195 case ISD::SADDSAT:
12196 case ISD::SSUBSAT: {
12197 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12198 "Unexpected custom legalisation");
12199 Results.push_back(expandAddSubSat(N, DAG));
12200 return;
12201 }
12202 case ISD::ABS: {
12203 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12204 "Unexpected custom legalisation");
12205
12206 if (Subtarget.hasStdExtZbb()) {
12207 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12208 // This allows us to remember that the result is sign extended. Expanding
12209 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12210 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12211 N->getOperand(0));
12212 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12213 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12214 return;
12215 }
12216
12217 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12218 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12219
12220 // Freeze the source so we can increase it's use count.
12221 Src = DAG.getFreeze(Src);
12222
12223 // Copy sign bit to all bits using the sraiw pattern.
12224 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12225 DAG.getValueType(MVT::i32));
12226 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12227 DAG.getConstant(31, DL, MVT::i64));
12228
12229 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12230 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12231
12232 // NOTE: The result is only required to be anyextended, but sext is
12233 // consistent with type legalization of sub.
12234 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12235 DAG.getValueType(MVT::i32));
12236 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12237 return;
12238 }
12239 case ISD::BITCAST: {
12240 EVT VT = N->getValueType(0);
12241 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12242 SDValue Op0 = N->getOperand(0);
12243 EVT Op0VT = Op0.getValueType();
12244 MVT XLenVT = Subtarget.getXLenVT();
12245 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12246 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12247 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12248 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12249 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12250 Subtarget.hasStdExtZfbfmin()) {
12251 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12252 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12253 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12254 Subtarget.hasStdExtFOrZfinx()) {
12255 SDValue FPConv =
12256 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12257 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12258 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12259 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12260 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12261 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12262 NewReg.getValue(0), NewReg.getValue(1));
12263 Results.push_back(RetReg);
12264 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12265 isTypeLegal(Op0VT)) {
12266 // Custom-legalize bitcasts from fixed-length vector types to illegal
12267 // scalar types in order to improve codegen. Bitcast the vector to a
12268 // one-element vector type whose element type is the same as the result
12269 // type, and extract the first element.
12270 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12271 if (isTypeLegal(BVT)) {
12272 SDValue BVec = DAG.getBitcast(BVT, Op0);
12273 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12274 DAG.getVectorIdxConstant(0, DL)));
12275 }
12276 }
12277 break;
12278 }
12279 case RISCVISD::BREV8: {
12280 MVT VT = N->getSimpleValueType(0);
12281 MVT XLenVT = Subtarget.getXLenVT();
12282 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12283 "Unexpected custom legalisation");
12284 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12285 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12286 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12287 // ReplaceNodeResults requires we maintain the same type for the return
12288 // value.
12289 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12290 break;
12291 }
12293 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12294 // type is illegal (currently only vXi64 RV32).
12295 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12296 // transferred to the destination register. We issue two of these from the
12297 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12298 // first element.
12299 SDValue Vec = N->getOperand(0);
12300 SDValue Idx = N->getOperand(1);
12301
12302 // The vector type hasn't been legalized yet so we can't issue target
12303 // specific nodes if it needs legalization.
12304 // FIXME: We would manually legalize if it's important.
12305 if (!isTypeLegal(Vec.getValueType()))
12306 return;
12307
12308 MVT VecVT = Vec.getSimpleValueType();
12309
12310 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12311 VecVT.getVectorElementType() == MVT::i64 &&
12312 "Unexpected EXTRACT_VECTOR_ELT legalization");
12313
12314 // If this is a fixed vector, we need to convert it to a scalable vector.
12315 MVT ContainerVT = VecVT;
12316 if (VecVT.isFixedLengthVector()) {
12317 ContainerVT = getContainerForFixedLengthVector(VecVT);
12318 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12319 }
12320
12321 MVT XLenVT = Subtarget.getXLenVT();
12322
12323 // Use a VL of 1 to avoid processing more elements than we need.
12324 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12325
12326 // Unless the index is known to be 0, we must slide the vector down to get
12327 // the desired element into index 0.
12328 if (!isNullConstant(Idx)) {
12329 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12330 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12331 }
12332
12333 // Extract the lower XLEN bits of the correct vector element.
12334 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12335
12336 // To extract the upper XLEN bits of the vector element, shift the first
12337 // element right by 32 bits and re-extract the lower XLEN bits.
12338 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12339 DAG.getUNDEF(ContainerVT),
12340 DAG.getConstant(32, DL, XLenVT), VL);
12341 SDValue LShr32 =
12342 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12343 DAG.getUNDEF(ContainerVT), Mask, VL);
12344
12345 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12346
12347 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12348 break;
12349 }
12351 unsigned IntNo = N->getConstantOperandVal(0);
12352 switch (IntNo) {
12353 default:
12355 "Don't know how to custom type legalize this intrinsic!");
12356 case Intrinsic::experimental_get_vector_length: {
12357 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12358 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12359 return;
12360 }
12361 case Intrinsic::experimental_cttz_elts: {
12362 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12363 Results.push_back(
12364 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12365 return;
12366 }
12367 case Intrinsic::riscv_orc_b:
12368 case Intrinsic::riscv_brev8:
12369 case Intrinsic::riscv_sha256sig0:
12370 case Intrinsic::riscv_sha256sig1:
12371 case Intrinsic::riscv_sha256sum0:
12372 case Intrinsic::riscv_sha256sum1:
12373 case Intrinsic::riscv_sm3p0:
12374 case Intrinsic::riscv_sm3p1: {
12375 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12376 return;
12377 unsigned Opc;
12378 switch (IntNo) {
12379 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12380 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12381 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12382 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12383 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12384 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12385 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12386 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12387 }
12388
12389 SDValue NewOp =
12390 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12391 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12392 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12393 return;
12394 }
12395 case Intrinsic::riscv_sm4ks:
12396 case Intrinsic::riscv_sm4ed: {
12397 unsigned Opc =
12398 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12399 SDValue NewOp0 =
12400 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12401 SDValue NewOp1 =
12402 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12403 SDValue Res =
12404 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12405 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12406 return;
12407 }
12408 case Intrinsic::riscv_mopr: {
12409 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12410 return;
12411 SDValue NewOp =
12412 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12413 SDValue Res = DAG.getNode(
12414 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12415 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12416 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12417 return;
12418 }
12419 case Intrinsic::riscv_moprr: {
12420 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12421 return;
12422 SDValue NewOp0 =
12423 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12424 SDValue NewOp1 =
12425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12426 SDValue Res = DAG.getNode(
12427 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12428 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12429 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12430 return;
12431 }
12432 case Intrinsic::riscv_clmul: {
12433 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12434 return;
12435
12436 SDValue NewOp0 =
12437 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12438 SDValue NewOp1 =
12439 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12440 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12441 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12442 return;
12443 }
12444 case Intrinsic::riscv_clmulh:
12445 case Intrinsic::riscv_clmulr: {
12446 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12447 return;
12448
12449 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12450 // to the full 128-bit clmul result of multiplying two xlen values.
12451 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12452 // upper 32 bits.
12453 //
12454 // The alternative is to mask the inputs to 32 bits and use clmul, but
12455 // that requires two shifts to mask each input without zext.w.
12456 // FIXME: If the inputs are known zero extended or could be freely
12457 // zero extended, the mask form would be better.
12458 SDValue NewOp0 =
12459 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12460 SDValue NewOp1 =
12461 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12462 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12463 DAG.getConstant(32, DL, MVT::i64));
12464 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12465 DAG.getConstant(32, DL, MVT::i64));
12466 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12468 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12469 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12470 DAG.getConstant(32, DL, MVT::i64));
12471 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12472 return;
12473 }
12474 case Intrinsic::riscv_vmv_x_s: {
12475 EVT VT = N->getValueType(0);
12476 MVT XLenVT = Subtarget.getXLenVT();
12477 if (VT.bitsLT(XLenVT)) {
12478 // Simple case just extract using vmv.x.s and truncate.
12479 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12480 Subtarget.getXLenVT(), N->getOperand(1));
12481 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12482 return;
12483 }
12484
12485 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12486 "Unexpected custom legalization");
12487
12488 // We need to do the move in two steps.
12489 SDValue Vec = N->getOperand(1);
12490 MVT VecVT = Vec.getSimpleValueType();
12491
12492 // First extract the lower XLEN bits of the element.
12493 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12494
12495 // To extract the upper XLEN bits of the vector element, shift the first
12496 // element right by 32 bits and re-extract the lower XLEN bits.
12497 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12498
12499 SDValue ThirtyTwoV =
12500 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12501 DAG.getConstant(32, DL, XLenVT), VL);
12502 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12503 DAG.getUNDEF(VecVT), Mask, VL);
12504 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12505
12506 Results.push_back(
12507 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12508 break;
12509 }
12510 }
12511 break;
12512 }
12513 case ISD::VECREDUCE_ADD:
12514 case ISD::VECREDUCE_AND:
12515 case ISD::VECREDUCE_OR:
12516 case ISD::VECREDUCE_XOR:
12521 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12522 Results.push_back(V);
12523 break;
12524 case ISD::VP_REDUCE_ADD:
12525 case ISD::VP_REDUCE_AND:
12526 case ISD::VP_REDUCE_OR:
12527 case ISD::VP_REDUCE_XOR:
12528 case ISD::VP_REDUCE_SMAX:
12529 case ISD::VP_REDUCE_UMAX:
12530 case ISD::VP_REDUCE_SMIN:
12531 case ISD::VP_REDUCE_UMIN:
12532 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12533 Results.push_back(V);
12534 break;
12535 case ISD::GET_ROUNDING: {
12536 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12537 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12538 Results.push_back(Res.getValue(0));
12539 Results.push_back(Res.getValue(1));
12540 break;
12541 }
12542 }
12543}
12544
12545/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12546/// which corresponds to it.
12547static unsigned getVecReduceOpcode(unsigned Opc) {
12548 switch (Opc) {
12549 default:
12550 llvm_unreachable("Unhandled binary to transfrom reduction");
12551 case ISD::ADD:
12552 return ISD::VECREDUCE_ADD;
12553 case ISD::UMAX:
12554 return ISD::VECREDUCE_UMAX;
12555 case ISD::SMAX:
12556 return ISD::VECREDUCE_SMAX;
12557 case ISD::UMIN:
12558 return ISD::VECREDUCE_UMIN;
12559 case ISD::SMIN:
12560 return ISD::VECREDUCE_SMIN;
12561 case ISD::AND:
12562 return ISD::VECREDUCE_AND;
12563 case ISD::OR:
12564 return ISD::VECREDUCE_OR;
12565 case ISD::XOR:
12566 return ISD::VECREDUCE_XOR;
12567 case ISD::FADD:
12568 // Note: This is the associative form of the generic reduction opcode.
12569 return ISD::VECREDUCE_FADD;
12570 }
12571}
12572
12573/// Perform two related transforms whose purpose is to incrementally recognize
12574/// an explode_vector followed by scalar reduction as a vector reduction node.
12575/// This exists to recover from a deficiency in SLP which can't handle
12576/// forests with multiple roots sharing common nodes. In some cases, one
12577/// of the trees will be vectorized, and the other will remain (unprofitably)
12578/// scalarized.
12579static SDValue
12581 const RISCVSubtarget &Subtarget) {
12582
12583 // This transforms need to run before all integer types have been legalized
12584 // to i64 (so that the vector element type matches the add type), and while
12585 // it's safe to introduce odd sized vector types.
12587 return SDValue();
12588
12589 // Without V, this transform isn't useful. We could form the (illegal)
12590 // operations and let them be scalarized again, but there's really no point.
12591 if (!Subtarget.hasVInstructions())
12592 return SDValue();
12593
12594 const SDLoc DL(N);
12595 const EVT VT = N->getValueType(0);
12596 const unsigned Opc = N->getOpcode();
12597
12598 // For FADD, we only handle the case with reassociation allowed. We
12599 // could handle strict reduction order, but at the moment, there's no
12600 // known reason to, and the complexity isn't worth it.
12601 // TODO: Handle fminnum and fmaxnum here
12602 if (!VT.isInteger() &&
12603 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12604 return SDValue();
12605
12606 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12607 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12608 "Inconsistent mappings");
12609 SDValue LHS = N->getOperand(0);
12610 SDValue RHS = N->getOperand(1);
12611
12612 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12613 return SDValue();
12614
12615 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12616 std::swap(LHS, RHS);
12617
12618 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12619 !isa<ConstantSDNode>(RHS.getOperand(1)))
12620 return SDValue();
12621
12622 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12623 SDValue SrcVec = RHS.getOperand(0);
12624 EVT SrcVecVT = SrcVec.getValueType();
12625 assert(SrcVecVT.getVectorElementType() == VT);
12626 if (SrcVecVT.isScalableVector())
12627 return SDValue();
12628
12629 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12630 return SDValue();
12631
12632 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12633 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12634 // root of our reduction tree. TODO: We could extend this to any two
12635 // adjacent aligned constant indices if desired.
12636 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12637 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12638 uint64_t LHSIdx =
12639 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12640 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12641 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12642 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12643 DAG.getVectorIdxConstant(0, DL));
12644 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12645 }
12646 }
12647
12648 // Match (binop (reduce (extract_subvector V, 0),
12649 // (extract_vector_elt V, sizeof(SubVec))))
12650 // into a reduction of one more element from the original vector V.
12651 if (LHS.getOpcode() != ReduceOpc)
12652 return SDValue();
12653
12654 SDValue ReduceVec = LHS.getOperand(0);
12655 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12656 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12657 isNullConstant(ReduceVec.getOperand(1)) &&
12658 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12659 // For illegal types (e.g. 3xi32), most will be combined again into a
12660 // wider (hopefully legal) type. If this is a terminal state, we are
12661 // relying on type legalization here to produce something reasonable
12662 // and this lowering quality could probably be improved. (TODO)
12663 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12664 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12665 DAG.getVectorIdxConstant(0, DL));
12666 auto Flags = ReduceVec->getFlags();
12667 Flags.intersectWith(N->getFlags());
12668 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12669 }
12670
12671 return SDValue();
12672}
12673
12674
12675// Try to fold (<bop> x, (reduction.<bop> vec, start))
12677 const RISCVSubtarget &Subtarget) {
12678 auto BinOpToRVVReduce = [](unsigned Opc) {
12679 switch (Opc) {
12680 default:
12681 llvm_unreachable("Unhandled binary to transfrom reduction");
12682 case ISD::ADD:
12684 case ISD::UMAX:
12686 case ISD::SMAX:
12688 case ISD::UMIN:
12690 case ISD::SMIN:
12692 case ISD::AND:
12694 case ISD::OR:
12696 case ISD::XOR:
12698 case ISD::FADD:
12700 case ISD::FMAXNUM:
12702 case ISD::FMINNUM:
12704 }
12705 };
12706
12707 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12708 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12709 isNullConstant(V.getOperand(1)) &&
12710 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12711 };
12712
12713 unsigned Opc = N->getOpcode();
12714 unsigned ReduceIdx;
12715 if (IsReduction(N->getOperand(0), Opc))
12716 ReduceIdx = 0;
12717 else if (IsReduction(N->getOperand(1), Opc))
12718 ReduceIdx = 1;
12719 else
12720 return SDValue();
12721
12722 // Skip if FADD disallows reassociation but the combiner needs.
12723 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12724 return SDValue();
12725
12726 SDValue Extract = N->getOperand(ReduceIdx);
12727 SDValue Reduce = Extract.getOperand(0);
12728 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12729 return SDValue();
12730
12731 SDValue ScalarV = Reduce.getOperand(2);
12732 EVT ScalarVT = ScalarV.getValueType();
12733 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12734 ScalarV.getOperand(0)->isUndef() &&
12735 isNullConstant(ScalarV.getOperand(2)))
12736 ScalarV = ScalarV.getOperand(1);
12737
12738 // Make sure that ScalarV is a splat with VL=1.
12739 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12740 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12741 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12742 return SDValue();
12743
12744 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12745 return SDValue();
12746
12747 // Check the scalar of ScalarV is neutral element
12748 // TODO: Deal with value other than neutral element.
12749 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12750 0))
12751 return SDValue();
12752
12753 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12754 // FIXME: We might be able to improve this if operand 0 is undef.
12755 if (!isNonZeroAVL(Reduce.getOperand(5)))
12756 return SDValue();
12757
12758 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12759
12760 SDLoc DL(N);
12761 SDValue NewScalarV =
12762 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12763 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12764
12765 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12766 if (ScalarVT != ScalarV.getValueType())
12767 NewScalarV =
12768 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12769 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12770
12771 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12772 NewScalarV, Reduce.getOperand(3),
12773 Reduce.getOperand(4), Reduce.getOperand(5)};
12774 SDValue NewReduce =
12775 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12776 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12777 Extract.getOperand(1));
12778}
12779
12780// Optimize (add (shl x, c0), (shl y, c1)) ->
12781// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12783 const RISCVSubtarget &Subtarget) {
12784 // Perform this optimization only in the zba extension.
12785 if (!Subtarget.hasStdExtZba())
12786 return SDValue();
12787
12788 // Skip for vector types and larger types.
12789 EVT VT = N->getValueType(0);
12790 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12791 return SDValue();
12792
12793 // The two operand nodes must be SHL and have no other use.
12794 SDValue N0 = N->getOperand(0);
12795 SDValue N1 = N->getOperand(1);
12796 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12797 !N0->hasOneUse() || !N1->hasOneUse())
12798 return SDValue();
12799
12800 // Check c0 and c1.
12801 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12802 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12803 if (!N0C || !N1C)
12804 return SDValue();
12805 int64_t C0 = N0C->getSExtValue();
12806 int64_t C1 = N1C->getSExtValue();
12807 if (C0 <= 0 || C1 <= 0)
12808 return SDValue();
12809
12810 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12811 int64_t Bits = std::min(C0, C1);
12812 int64_t Diff = std::abs(C0 - C1);
12813 if (Diff != 1 && Diff != 2 && Diff != 3)
12814 return SDValue();
12815
12816 // Build nodes.
12817 SDLoc DL(N);
12818 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12819 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12820 SDValue NA0 =
12821 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
12822 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
12823 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
12824}
12825
12826// Combine a constant select operand into its use:
12827//
12828// (and (select cond, -1, c), x)
12829// -> (select cond, x, (and x, c)) [AllOnes=1]
12830// (or (select cond, 0, c), x)
12831// -> (select cond, x, (or x, c)) [AllOnes=0]
12832// (xor (select cond, 0, c), x)
12833// -> (select cond, x, (xor x, c)) [AllOnes=0]
12834// (add (select cond, 0, c), x)
12835// -> (select cond, x, (add x, c)) [AllOnes=0]
12836// (sub x, (select cond, 0, c))
12837// -> (select cond, x, (sub x, c)) [AllOnes=0]
12839 SelectionDAG &DAG, bool AllOnes,
12840 const RISCVSubtarget &Subtarget) {
12841 EVT VT = N->getValueType(0);
12842
12843 // Skip vectors.
12844 if (VT.isVector())
12845 return SDValue();
12846
12847 if (!Subtarget.hasConditionalMoveFusion()) {
12848 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12849 if ((!Subtarget.hasStdExtZicond() &&
12850 !Subtarget.hasVendorXVentanaCondOps()) ||
12851 N->getOpcode() != ISD::AND)
12852 return SDValue();
12853
12854 // Maybe harmful when condition code has multiple use.
12855 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12856 return SDValue();
12857
12858 // Maybe harmful when VT is wider than XLen.
12859 if (VT.getSizeInBits() > Subtarget.getXLen())
12860 return SDValue();
12861 }
12862
12863 if ((Slct.getOpcode() != ISD::SELECT &&
12864 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12865 !Slct.hasOneUse())
12866 return SDValue();
12867
12868 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12870 };
12871
12872 bool SwapSelectOps;
12873 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12874 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12875 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12876 SDValue NonConstantVal;
12877 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12878 SwapSelectOps = false;
12879 NonConstantVal = FalseVal;
12880 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12881 SwapSelectOps = true;
12882 NonConstantVal = TrueVal;
12883 } else
12884 return SDValue();
12885
12886 // Slct is now know to be the desired identity constant when CC is true.
12887 TrueVal = OtherOp;
12888 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12889 // Unless SwapSelectOps says the condition should be false.
12890 if (SwapSelectOps)
12891 std::swap(TrueVal, FalseVal);
12892
12893 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12894 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12895 {Slct.getOperand(0), Slct.getOperand(1),
12896 Slct.getOperand(2), TrueVal, FalseVal});
12897
12898 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12899 {Slct.getOperand(0), TrueVal, FalseVal});
12900}
12901
12902// Attempt combineSelectAndUse on each operand of a commutative operator N.
12904 bool AllOnes,
12905 const RISCVSubtarget &Subtarget) {
12906 SDValue N0 = N->getOperand(0);
12907 SDValue N1 = N->getOperand(1);
12908 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
12909 return Result;
12910 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
12911 return Result;
12912 return SDValue();
12913}
12914
12915// Transform (add (mul x, c0), c1) ->
12916// (add (mul (add x, c1/c0), c0), c1%c0).
12917// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12918// that should be excluded is when c0*(c1/c0) is simm12, which will lead
12919// to an infinite loop in DAGCombine if transformed.
12920// Or transform (add (mul x, c0), c1) ->
12921// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12922// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12923// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12924// lead to an infinite loop in DAGCombine if transformed.
12925// Or transform (add (mul x, c0), c1) ->
12926// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12927// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12928// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12929// lead to an infinite loop in DAGCombine if transformed.
12930// Or transform (add (mul x, c0), c1) ->
12931// (mul (add x, c1/c0), c0).
12932// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12934 const RISCVSubtarget &Subtarget) {
12935 // Skip for vector types and larger types.
12936 EVT VT = N->getValueType(0);
12937 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12938 return SDValue();
12939 // The first operand node must be a MUL and has no other use.
12940 SDValue N0 = N->getOperand(0);
12941 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12942 return SDValue();
12943 // Check if c0 and c1 match above conditions.
12944 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12945 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12946 if (!N0C || !N1C)
12947 return SDValue();
12948 // If N0C has multiple uses it's possible one of the cases in
12949 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12950 // in an infinite loop.
12951 if (!N0C->hasOneUse())
12952 return SDValue();
12953 int64_t C0 = N0C->getSExtValue();
12954 int64_t C1 = N1C->getSExtValue();
12955 int64_t CA, CB;
12956 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
12957 return SDValue();
12958 // Search for proper CA (non-zero) and CB that both are simm12.
12959 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
12960 !isInt<12>(C0 * (C1 / C0))) {
12961 CA = C1 / C0;
12962 CB = C1 % C0;
12963 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
12964 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
12965 CA = C1 / C0 + 1;
12966 CB = C1 % C0 - C0;
12967 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
12968 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
12969 CA = C1 / C0 - 1;
12970 CB = C1 % C0 + C0;
12971 } else
12972 return SDValue();
12973 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12974 SDLoc DL(N);
12975 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
12976 DAG.getConstant(CA, DL, VT));
12977 SDValue New1 =
12978 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
12979 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
12980}
12981
12982// add (zext, zext) -> zext (add (zext, zext))
12983// sub (zext, zext) -> sext (sub (zext, zext))
12984// mul (zext, zext) -> zext (mul (zext, zext))
12985// sdiv (zext, zext) -> zext (sdiv (zext, zext))
12986// udiv (zext, zext) -> zext (udiv (zext, zext))
12987// srem (zext, zext) -> zext (srem (zext, zext))
12988// urem (zext, zext) -> zext (urem (zext, zext))
12989//
12990// where the sum of the extend widths match, and the the range of the bin op
12991// fits inside the width of the narrower bin op. (For profitability on rvv, we
12992// use a power of two for both inner and outer extend.)
12994
12995 EVT VT = N->getValueType(0);
12996 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
12997 return SDValue();
12998
12999 SDValue N0 = N->getOperand(0);
13000 SDValue N1 = N->getOperand(1);
13002 return SDValue();
13003 if (!N0.hasOneUse() || !N1.hasOneUse())
13004 return SDValue();
13005
13006 SDValue Src0 = N0.getOperand(0);
13007 SDValue Src1 = N1.getOperand(0);
13008 EVT SrcVT = Src0.getValueType();
13009 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13010 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13011 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13012 return SDValue();
13013
13014 LLVMContext &C = *DAG.getContext();
13016 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13017
13018 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13019 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13020
13021 // Src0 and Src1 are zero extended, so they're always positive if signed.
13022 //
13023 // sub can produce a negative from two positive operands, so it needs sign
13024 // extended. Other nodes produce a positive from two positive operands, so
13025 // zero extend instead.
13026 unsigned OuterExtend =
13027 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13028
13029 return DAG.getNode(
13030 OuterExtend, SDLoc(N), VT,
13031 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13032}
13033
13034// Try to turn (add (xor bool, 1) -1) into (neg bool).
13036 SDValue N0 = N->getOperand(0);
13037 SDValue N1 = N->getOperand(1);
13038 EVT VT = N->getValueType(0);
13039 SDLoc DL(N);
13040
13041 // RHS should be -1.
13042 if (!isAllOnesConstant(N1))
13043 return SDValue();
13044
13045 // Look for (xor X, 1).
13046 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13047 return SDValue();
13048
13049 // First xor input should be 0 or 1.
13051 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13052 return SDValue();
13053
13054 // Emit a negate of the setcc.
13055 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13056 N0.getOperand(0));
13057}
13058
13060 const RISCVSubtarget &Subtarget) {
13061 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13062 return V;
13063 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13064 return V;
13065 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13066 return V;
13067 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13068 return V;
13069 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13070 return V;
13071 if (SDValue V = combineBinOpOfZExt(N, DAG))
13072 return V;
13073
13074 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13075 // (select lhs, rhs, cc, x, (add x, y))
13076 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13077}
13078
13079// Try to turn a sub boolean RHS and constant LHS into an addi.
13081 SDValue N0 = N->getOperand(0);
13082 SDValue N1 = N->getOperand(1);
13083 EVT VT = N->getValueType(0);
13084 SDLoc DL(N);
13085
13086 // Require a constant LHS.
13087 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13088 if (!N0C)
13089 return SDValue();
13090
13091 // All our optimizations involve subtracting 1 from the immediate and forming
13092 // an ADDI. Make sure the new immediate is valid for an ADDI.
13093 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13094 if (!ImmValMinus1.isSignedIntN(12))
13095 return SDValue();
13096
13097 SDValue NewLHS;
13098 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13099 // (sub constant, (setcc x, y, eq/neq)) ->
13100 // (add (setcc x, y, neq/eq), constant - 1)
13101 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13102 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13103 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13104 return SDValue();
13105 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13106 NewLHS =
13107 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13108 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13109 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13110 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13111 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13112 NewLHS = N1.getOperand(0);
13113 } else
13114 return SDValue();
13115
13116 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13117 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13118}
13119
13121 const RISCVSubtarget &Subtarget) {
13122 if (SDValue V = combineSubOfBoolean(N, DAG))
13123 return V;
13124
13125 EVT VT = N->getValueType(0);
13126 SDValue N0 = N->getOperand(0);
13127 SDValue N1 = N->getOperand(1);
13128 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13129 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13130 isNullConstant(N1.getOperand(1))) {
13131 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13132 if (CCVal == ISD::SETLT) {
13133 SDLoc DL(N);
13134 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13135 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13136 DAG.getConstant(ShAmt, DL, VT));
13137 }
13138 }
13139
13140 if (SDValue V = combineBinOpOfZExt(N, DAG))
13141 return V;
13142
13143 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13144 // (select lhs, rhs, cc, x, (sub x, y))
13145 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13146}
13147
13148// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13149// Legalizing setcc can introduce xors like this. Doing this transform reduces
13150// the number of xors and may allow the xor to fold into a branch condition.
13152 SDValue N0 = N->getOperand(0);
13153 SDValue N1 = N->getOperand(1);
13154 bool IsAnd = N->getOpcode() == ISD::AND;
13155
13156 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13157 return SDValue();
13158
13159 if (!N0.hasOneUse() || !N1.hasOneUse())
13160 return SDValue();
13161
13162 SDValue N01 = N0.getOperand(1);
13163 SDValue N11 = N1.getOperand(1);
13164
13165 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13166 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13167 // operation is And, allow one of the Xors to use -1.
13168 if (isOneConstant(N01)) {
13169 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13170 return SDValue();
13171 } else if (isOneConstant(N11)) {
13172 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13173 if (!(IsAnd && isAllOnesConstant(N01)))
13174 return SDValue();
13175 } else
13176 return SDValue();
13177
13178 EVT VT = N->getValueType(0);
13179
13180 SDValue N00 = N0.getOperand(0);
13181 SDValue N10 = N1.getOperand(0);
13182
13183 // The LHS of the xors needs to be 0/1.
13185 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13186 return SDValue();
13187
13188 // Invert the opcode and insert a new xor.
13189 SDLoc DL(N);
13190 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13191 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13192 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13193}
13194
13196 const RISCVSubtarget &Subtarget) {
13197 SDValue N0 = N->getOperand(0);
13198 EVT VT = N->getValueType(0);
13199
13200 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13201 // extending X. This is safe since we only need the LSB after the shift and
13202 // shift amounts larger than 31 would produce poison. If we wait until
13203 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13204 // to use a BEXT instruction.
13205 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13206 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13207 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13208 SDLoc DL(N0);
13209 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13210 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13211 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13212 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13213 }
13214
13215 return SDValue();
13216}
13217
13218// Combines two comparison operation and logic operation to one selection
13219// operation(min, max) and logic operation. Returns new constructed Node if
13220// conditions for optimization are satisfied.
13223 const RISCVSubtarget &Subtarget) {
13224 SelectionDAG &DAG = DCI.DAG;
13225
13226 SDValue N0 = N->getOperand(0);
13227 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13228 // extending X. This is safe since we only need the LSB after the shift and
13229 // shift amounts larger than 31 would produce poison. If we wait until
13230 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13231 // to use a BEXT instruction.
13232 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13233 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13234 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13235 N0.hasOneUse()) {
13236 SDLoc DL(N);
13237 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13238 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13239 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13240 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13241 DAG.getConstant(1, DL, MVT::i64));
13242 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13243 }
13244
13245 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13246 return V;
13247 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13248 return V;
13249
13250 if (DCI.isAfterLegalizeDAG())
13251 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13252 return V;
13253
13254 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13255 // (select lhs, rhs, cc, x, (and x, y))
13256 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13257}
13258
13259// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13260// FIXME: Generalize to other binary operators with same operand.
13262 SelectionDAG &DAG) {
13263 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13264
13265 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13267 !N0.hasOneUse() || !N1.hasOneUse())
13268 return SDValue();
13269
13270 // Should have the same condition.
13271 SDValue Cond = N0.getOperand(1);
13272 if (Cond != N1.getOperand(1))
13273 return SDValue();
13274
13275 SDValue TrueV = N0.getOperand(0);
13276 SDValue FalseV = N1.getOperand(0);
13277
13278 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13279 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13280 !isOneConstant(TrueV.getOperand(1)) ||
13281 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13282 return SDValue();
13283
13284 EVT VT = N->getValueType(0);
13285 SDLoc DL(N);
13286
13287 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13288 Cond);
13289 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13290 Cond);
13291 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13292 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13293}
13294
13296 const RISCVSubtarget &Subtarget) {
13297 SelectionDAG &DAG = DCI.DAG;
13298
13299 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13300 return V;
13301 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13302 return V;
13303
13304 if (DCI.isAfterLegalizeDAG())
13305 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13306 return V;
13307
13308 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13309 // We may be able to pull a common operation out of the true and false value.
13310 SDValue N0 = N->getOperand(0);
13311 SDValue N1 = N->getOperand(1);
13312 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13313 return V;
13314 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13315 return V;
13316
13317 // fold (or (select cond, 0, y), x) ->
13318 // (select cond, x, (or x, y))
13319 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13320}
13321
13323 const RISCVSubtarget &Subtarget) {
13324 SDValue N0 = N->getOperand(0);
13325 SDValue N1 = N->getOperand(1);
13326
13327 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13328 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13329 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13330 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13331 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13332 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13333 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13334 SDLoc DL(N);
13335 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13336 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13337 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13338 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13339 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13340 }
13341
13342 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13343 // NOTE: Assumes ROL being legal means ROLW is legal.
13344 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13345 if (N0.getOpcode() == RISCVISD::SLLW &&
13347 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13348 SDLoc DL(N);
13349 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13350 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13351 }
13352
13353 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13354 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13355 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13356 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13357 if (ConstN00 && CC == ISD::SETLT) {
13358 EVT VT = N0.getValueType();
13359 SDLoc DL(N0);
13360 const APInt &Imm = ConstN00->getAPIntValue();
13361 if ((Imm + 1).isSignedIntN(12))
13362 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13363 DAG.getConstant(Imm + 1, DL, VT), CC);
13364 }
13365 }
13366
13367 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13368 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13369 // would have been promoted to i32, but the setcc would have i64 result.
13370 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13371 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13372 SDValue N00 = N0.getOperand(0);
13373 SDLoc DL(N);
13374 SDValue LHS = N00.getOperand(0);
13375 SDValue RHS = N00.getOperand(1);
13376 SDValue CC = N00.getOperand(2);
13377 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13378 LHS.getValueType());
13379 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13380 LHS, RHS, NotCC);
13381 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13382 }
13383
13384 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13385 return V;
13386 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13387 return V;
13388
13389 // fold (xor (select cond, 0, y), x) ->
13390 // (select cond, x, (xor x, y))
13391 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13392}
13393
13394// Try to expand a scalar multiply to a faster sequence.
13397 const RISCVSubtarget &Subtarget) {
13398
13399 EVT VT = N->getValueType(0);
13400
13401 // LI + MUL is usually smaller than the alternative sequence.
13403 return SDValue();
13404
13405 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13406 return SDValue();
13407
13408 if (VT != Subtarget.getXLenVT())
13409 return SDValue();
13410
13411 if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
13412 return SDValue();
13413
13414 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13415 if (!CNode)
13416 return SDValue();
13417 uint64_t MulAmt = CNode->getZExtValue();
13418
13419 for (uint64_t Divisor : {3, 5, 9}) {
13420 if (MulAmt % Divisor != 0)
13421 continue;
13422 uint64_t MulAmt2 = MulAmt / Divisor;
13423 // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
13424 // Matched in tablegen, avoid perturbing patterns.
13425 if (isPowerOf2_64(MulAmt2))
13426 return SDValue();
13427
13428 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13429 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13430 SDLoc DL(N);
13431 SDValue X = DAG.getFreeze(N->getOperand(0));
13432 SDValue Mul359 =
13433 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13434 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13435 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13436 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13437 Mul359);
13438 }
13439 }
13440
13441 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13442 // shXadd. First check if this a sum of two power of 2s because that's
13443 // easy. Then count how many zeros are up to the first bit.
13444 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13445 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13446 if (ScaleShift >= 1 && ScaleShift < 4) {
13447 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13448 SDLoc DL(N);
13449 SDValue X = DAG.getFreeze(N->getOperand(0));
13450 SDValue Shift1 =
13451 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13452 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13453 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13454 }
13455 }
13456
13457 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13458 // This is the two instruction form, there are also three instruction
13459 // variants we could implement. e.g.
13460 // (2^(1,2,3) * 3,5,9 + 1) << C2
13461 // 2^(C1>3) * 3,5,9 +/- 1
13462 for (uint64_t Divisor : {3, 5, 9}) {
13463 uint64_t C = MulAmt - 1;
13464 if (C <= Divisor)
13465 continue;
13466 unsigned TZ = llvm::countr_zero(C);
13467 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13468 SDLoc DL(N);
13469 SDValue X = DAG.getFreeze(N->getOperand(0));
13470 SDValue Mul359 =
13471 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13472 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13473 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13474 DAG.getConstant(TZ, DL, VT), X);
13475 }
13476 }
13477
13478 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13479 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13480 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13481 if (ScaleShift >= 1 && ScaleShift < 4) {
13482 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13483 SDLoc DL(N);
13484 SDValue X = DAG.getFreeze(N->getOperand(0));
13485 SDValue Shift1 =
13486 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13487 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13488 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13489 DAG.getConstant(ScaleShift, DL, VT), X));
13490 }
13491 }
13492
13493 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13494 for (uint64_t Offset : {3, 5, 9}) {
13495 if (isPowerOf2_64(MulAmt + Offset)) {
13496 SDLoc DL(N);
13497 SDValue Shift1 =
13498 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
13499 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13500 SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, N->getOperand(0),
13501 DAG.getConstant(Log2_64(Offset - 1), DL, VT),
13502 N->getOperand(0));
13503 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13504 }
13505 }
13506
13507 return SDValue();
13508}
13509
13510
13513 const RISCVSubtarget &Subtarget) {
13514 EVT VT = N->getValueType(0);
13515 if (!VT.isVector())
13516 return expandMul(N, DAG, DCI, Subtarget);
13517
13518 SDLoc DL(N);
13519 SDValue N0 = N->getOperand(0);
13520 SDValue N1 = N->getOperand(1);
13521 SDValue MulOper;
13522 unsigned AddSubOpc;
13523
13524 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13525 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13526 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13527 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13528 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13529 AddSubOpc = V->getOpcode();
13530 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13531 SDValue Opnd = V->getOperand(1);
13532 MulOper = V->getOperand(0);
13533 if (AddSubOpc == ISD::SUB)
13534 std::swap(Opnd, MulOper);
13535 if (isOneOrOneSplat(Opnd))
13536 return true;
13537 }
13538 return false;
13539 };
13540
13541 if (IsAddSubWith1(N0)) {
13542 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13543 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13544 }
13545
13546 if (IsAddSubWith1(N1)) {
13547 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13548 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13549 }
13550
13551 if (SDValue V = combineBinOpOfZExt(N, DAG))
13552 return V;
13553
13554 return SDValue();
13555}
13556
13557/// According to the property that indexed load/store instructions zero-extend
13558/// their indices, try to narrow the type of index operand.
13559static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13560 if (isIndexTypeSigned(IndexType))
13561 return false;
13562
13563 if (!N->hasOneUse())
13564 return false;
13565
13566 EVT VT = N.getValueType();
13567 SDLoc DL(N);
13568
13569 // In general, what we're doing here is seeing if we can sink a truncate to
13570 // a smaller element type into the expression tree building our index.
13571 // TODO: We can generalize this and handle a bunch more cases if useful.
13572
13573 // Narrow a buildvector to the narrowest element type. This requires less
13574 // work and less register pressure at high LMUL, and creates smaller constants
13575 // which may be cheaper to materialize.
13576 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13577 KnownBits Known = DAG.computeKnownBits(N);
13578 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13579 LLVMContext &C = *DAG.getContext();
13580 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13581 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13582 N = DAG.getNode(ISD::TRUNCATE, DL,
13583 VT.changeVectorElementType(ResultVT), N);
13584 return true;
13585 }
13586 }
13587
13588 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13589 if (N.getOpcode() != ISD::SHL)
13590 return false;
13591
13592 SDValue N0 = N.getOperand(0);
13593 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13595 return false;
13596 if (!N0->hasOneUse())
13597 return false;
13598
13599 APInt ShAmt;
13600 SDValue N1 = N.getOperand(1);
13601 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13602 return false;
13603
13604 SDValue Src = N0.getOperand(0);
13605 EVT SrcVT = Src.getValueType();
13606 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13607 unsigned ShAmtV = ShAmt.getZExtValue();
13608 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13609 NewElen = std::max(NewElen, 8U);
13610
13611 // Skip if NewElen is not narrower than the original extended type.
13612 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13613 return false;
13614
13615 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13616 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13617
13618 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13619 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13620 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13621 return true;
13622}
13623
13624// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13625// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13626// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13627// can become a sext.w instead of a shift pair.
13629 const RISCVSubtarget &Subtarget) {
13630 SDValue N0 = N->getOperand(0);
13631 SDValue N1 = N->getOperand(1);
13632 EVT VT = N->getValueType(0);
13633 EVT OpVT = N0.getValueType();
13634
13635 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13636 return SDValue();
13637
13638 // RHS needs to be a constant.
13639 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13640 if (!N1C)
13641 return SDValue();
13642
13643 // LHS needs to be (and X, 0xffffffff).
13644 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13645 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13646 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13647 return SDValue();
13648
13649 // Looking for an equality compare.
13650 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13651 if (!isIntEqualitySetCC(Cond))
13652 return SDValue();
13653
13654 // Don't do this if the sign bit is provably zero, it will be turned back into
13655 // an AND.
13656 APInt SignMask = APInt::getOneBitSet(64, 31);
13657 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13658 return SDValue();
13659
13660 const APInt &C1 = N1C->getAPIntValue();
13661
13662 SDLoc dl(N);
13663 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13664 // to be equal.
13665 if (C1.getActiveBits() > 32)
13666 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13667
13668 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13669 N0.getOperand(0), DAG.getValueType(MVT::i32));
13670 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13671 dl, OpVT), Cond);
13672}
13673
13674static SDValue
13676 const RISCVSubtarget &Subtarget) {
13677 SDValue Src = N->getOperand(0);
13678 EVT VT = N->getValueType(0);
13679
13680 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13681 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13682 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
13683 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13684 Src.getOperand(0));
13685
13686 return SDValue();
13687}
13688
13689namespace {
13690// Forward declaration of the structure holding the necessary information to
13691// apply a combine.
13692struct CombineResult;
13693
13694enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13695/// Helper class for folding sign/zero extensions.
13696/// In particular, this class is used for the following combines:
13697/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13698/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13699/// mul | mul_vl -> vwmul(u) | vwmul_su
13700/// shl | shl_vl -> vwsll
13701/// fadd -> vfwadd | vfwadd_w
13702/// fsub -> vfwsub | vfwsub_w
13703/// fmul -> vfwmul
13704/// An object of this class represents an operand of the operation we want to
13705/// combine.
13706/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13707/// NodeExtensionHelper for `a` and one for `b`.
13708///
13709/// This class abstracts away how the extension is materialized and
13710/// how its number of users affect the combines.
13711///
13712/// In particular:
13713/// - VWADD_W is conceptually == add(op0, sext(op1))
13714/// - VWADDU_W == add(op0, zext(op1))
13715/// - VWSUB_W == sub(op0, sext(op1))
13716/// - VWSUBU_W == sub(op0, zext(op1))
13717/// - VFWADD_W == fadd(op0, fpext(op1))
13718/// - VFWSUB_W == fsub(op0, fpext(op1))
13719/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13720/// zext|sext(smaller_value).
13721struct NodeExtensionHelper {
13722 /// Records if this operand is like being zero extended.
13723 bool SupportsZExt;
13724 /// Records if this operand is like being sign extended.
13725 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13726 /// instance, a splat constant (e.g., 3), would support being both sign and
13727 /// zero extended.
13728 bool SupportsSExt;
13729 /// Records if this operand is like being floating-Point extended.
13730 bool SupportsFPExt;
13731 /// This boolean captures whether we care if this operand would still be
13732 /// around after the folding happens.
13733 bool EnforceOneUse;
13734 /// Original value that this NodeExtensionHelper represents.
13735 SDValue OrigOperand;
13736
13737 /// Get the value feeding the extension or the value itself.
13738 /// E.g., for zext(a), this would return a.
13739 SDValue getSource() const {
13740 switch (OrigOperand.getOpcode()) {
13741 case ISD::ZERO_EXTEND:
13742 case ISD::SIGN_EXTEND:
13743 case RISCVISD::VSEXT_VL:
13744 case RISCVISD::VZEXT_VL:
13746 return OrigOperand.getOperand(0);
13747 default:
13748 return OrigOperand;
13749 }
13750 }
13751
13752 /// Check if this instance represents a splat.
13753 bool isSplat() const {
13754 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13755 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13756 }
13757
13758 /// Get the extended opcode.
13759 unsigned getExtOpc(ExtKind SupportsExt) const {
13760 switch (SupportsExt) {
13761 case ExtKind::SExt:
13762 return RISCVISD::VSEXT_VL;
13763 case ExtKind::ZExt:
13764 return RISCVISD::VZEXT_VL;
13765 case ExtKind::FPExt:
13767 }
13768 llvm_unreachable("Unknown ExtKind enum");
13769 }
13770
13771 /// Get or create a value that can feed \p Root with the given extension \p
13772 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13773 /// operand. \see ::getSource().
13774 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13775 const RISCVSubtarget &Subtarget,
13776 std::optional<ExtKind> SupportsExt) const {
13777 if (!SupportsExt.has_value())
13778 return OrigOperand;
13779
13780 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13781
13782 SDValue Source = getSource();
13783 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13784 if (Source.getValueType() == NarrowVT)
13785 return Source;
13786
13787 unsigned ExtOpc = getExtOpc(*SupportsExt);
13788
13789 // If we need an extension, we should be changing the type.
13790 SDLoc DL(OrigOperand);
13791 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13792 switch (OrigOperand.getOpcode()) {
13793 case ISD::ZERO_EXTEND:
13794 case ISD::SIGN_EXTEND:
13795 case RISCVISD::VSEXT_VL:
13796 case RISCVISD::VZEXT_VL:
13798 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13799 case ISD::SPLAT_VECTOR:
13800 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
13802 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13803 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
13804 default:
13805 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13806 // and that operand should already have the right NarrowVT so no
13807 // extension should be required at this point.
13808 llvm_unreachable("Unsupported opcode");
13809 }
13810 }
13811
13812 /// Helper function to get the narrow type for \p Root.
13813 /// The narrow type is the type of \p Root where we divided the size of each
13814 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13815 /// \pre Both the narrow type and the original type should be legal.
13816 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
13817 MVT VT = Root->getSimpleValueType(0);
13818
13819 // Determine the narrow size.
13820 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13821
13822 MVT EltVT = SupportsExt == ExtKind::FPExt
13823 ? MVT::getFloatingPointVT(NarrowSize)
13824 : MVT::getIntegerVT(NarrowSize);
13825
13826 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
13827 "Trying to extend something we can't represent");
13828 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
13829 return NarrowVT;
13830 }
13831
13832 /// Get the opcode to materialize:
13833 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13834 static unsigned getSExtOpcode(unsigned Opcode) {
13835 switch (Opcode) {
13836 case ISD::ADD:
13837 case RISCVISD::ADD_VL:
13840 case ISD::OR:
13841 return RISCVISD::VWADD_VL;
13842 case ISD::SUB:
13843 case RISCVISD::SUB_VL:
13846 return RISCVISD::VWSUB_VL;
13847 case ISD::MUL:
13848 case RISCVISD::MUL_VL:
13849 return RISCVISD::VWMUL_VL;
13850 default:
13851 llvm_unreachable("Unexpected opcode");
13852 }
13853 }
13854
13855 /// Get the opcode to materialize:
13856 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
13857 static unsigned getZExtOpcode(unsigned Opcode) {
13858 switch (Opcode) {
13859 case ISD::ADD:
13860 case RISCVISD::ADD_VL:
13863 case ISD::OR:
13864 return RISCVISD::VWADDU_VL;
13865 case ISD::SUB:
13866 case RISCVISD::SUB_VL:
13869 return RISCVISD::VWSUBU_VL;
13870 case ISD::MUL:
13871 case RISCVISD::MUL_VL:
13872 return RISCVISD::VWMULU_VL;
13873 case ISD::SHL:
13874 case RISCVISD::SHL_VL:
13875 return RISCVISD::VWSLL_VL;
13876 default:
13877 llvm_unreachable("Unexpected opcode");
13878 }
13879 }
13880
13881 /// Get the opcode to materialize:
13882 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
13883 static unsigned getFPExtOpcode(unsigned Opcode) {
13884 switch (Opcode) {
13885 case RISCVISD::FADD_VL:
13887 return RISCVISD::VFWADD_VL;
13888 case RISCVISD::FSUB_VL:
13890 return RISCVISD::VFWSUB_VL;
13891 case RISCVISD::FMUL_VL:
13892 return RISCVISD::VFWMUL_VL;
13893 default:
13894 llvm_unreachable("Unexpected opcode");
13895 }
13896 }
13897
13898 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
13899 /// newOpcode(a, b).
13900 static unsigned getSUOpcode(unsigned Opcode) {
13901 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
13902 "SU is only supported for MUL");
13903 return RISCVISD::VWMULSU_VL;
13904 }
13905
13906 /// Get the opcode to materialize
13907 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
13908 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
13909 switch (Opcode) {
13910 case ISD::ADD:
13911 case RISCVISD::ADD_VL:
13912 case ISD::OR:
13913 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
13915 case ISD::SUB:
13916 case RISCVISD::SUB_VL:
13917 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
13919 case RISCVISD::FADD_VL:
13920 return RISCVISD::VFWADD_W_VL;
13921 case RISCVISD::FSUB_VL:
13922 return RISCVISD::VFWSUB_W_VL;
13923 default:
13924 llvm_unreachable("Unexpected opcode");
13925 }
13926 }
13927
13928 using CombineToTry = std::function<std::optional<CombineResult>(
13929 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
13930 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
13931 const RISCVSubtarget &)>;
13932
13933 /// Check if this node needs to be fully folded or extended for all users.
13934 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
13935
13936 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
13937 const RISCVSubtarget &Subtarget) {
13938 unsigned Opc = OrigOperand.getOpcode();
13939 MVT VT = OrigOperand.getSimpleValueType();
13940
13941 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
13942 "Unexpected Opcode");
13943
13944 // The pasthru must be undef for tail agnostic.
13945 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
13946 return;
13947
13948 // Get the scalar value.
13949 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
13950 : OrigOperand.getOperand(1);
13951
13952 // See if we have enough sign bits or zero bits in the scalar to use a
13953 // widening opcode by splatting to smaller element size.
13954 unsigned EltBits = VT.getScalarSizeInBits();
13955 unsigned ScalarBits = Op.getValueSizeInBits();
13956 // Make sure we're getting all element bits from the scalar register.
13957 // FIXME: Support implicit sign extension of vmv.v.x?
13958 if (ScalarBits < EltBits)
13959 return;
13960
13961 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13962 // If the narrow type cannot be expressed with a legal VMV,
13963 // this is not a valid candidate.
13964 if (NarrowSize < 8)
13965 return;
13966
13967 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13968 SupportsSExt = true;
13969
13970 if (DAG.MaskedValueIsZero(Op,
13971 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13972 SupportsZExt = true;
13973
13974 EnforceOneUse = false;
13975 }
13976
13977 /// Helper method to set the various fields of this struct based on the
13978 /// type of \p Root.
13979 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
13980 const RISCVSubtarget &Subtarget) {
13981 SupportsZExt = false;
13982 SupportsSExt = false;
13983 SupportsFPExt = false;
13984 EnforceOneUse = true;
13985 unsigned Opc = OrigOperand.getOpcode();
13986 // For the nodes we handle below, we end up using their inputs directly: see
13987 // getSource(). However since they either don't have a passthru or we check
13988 // that their passthru is undef, we can safely ignore their mask and VL.
13989 switch (Opc) {
13990 case ISD::ZERO_EXTEND:
13991 case ISD::SIGN_EXTEND: {
13992 MVT VT = OrigOperand.getSimpleValueType();
13993 if (!VT.isVector())
13994 break;
13995
13996 SDValue NarrowElt = OrigOperand.getOperand(0);
13997 MVT NarrowVT = NarrowElt.getSimpleValueType();
13998 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
13999 if (NarrowVT.getVectorElementType() == MVT::i1)
14000 break;
14001
14002 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14003 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14004 break;
14005 }
14006 case RISCVISD::VZEXT_VL:
14007 SupportsZExt = true;
14008 break;
14009 case RISCVISD::VSEXT_VL:
14010 SupportsSExt = true;
14011 break;
14013 SupportsFPExt = true;
14014 break;
14015 case ISD::SPLAT_VECTOR:
14017 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14018 break;
14019 default:
14020 break;
14021 }
14022 }
14023
14024 /// Check if \p Root supports any extension folding combines.
14025 static bool isSupportedRoot(const SDNode *Root,
14026 const RISCVSubtarget &Subtarget) {
14027 switch (Root->getOpcode()) {
14028 case ISD::ADD:
14029 case ISD::SUB:
14030 case ISD::MUL: {
14031 return Root->getValueType(0).isScalableVector();
14032 }
14033 case ISD::OR: {
14034 return Root->getValueType(0).isScalableVector() &&
14035 Root->getFlags().hasDisjoint();
14036 }
14037 // Vector Widening Integer Add/Sub/Mul Instructions
14038 case RISCVISD::ADD_VL:
14039 case RISCVISD::MUL_VL:
14042 case RISCVISD::SUB_VL:
14045 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14046 case RISCVISD::FADD_VL:
14047 case RISCVISD::FSUB_VL:
14048 case RISCVISD::FMUL_VL:
14051 return true;
14052 case ISD::SHL:
14053 return Root->getValueType(0).isScalableVector() &&
14054 Subtarget.hasStdExtZvbb();
14055 case RISCVISD::SHL_VL:
14056 return Subtarget.hasStdExtZvbb();
14057 default:
14058 return false;
14059 }
14060 }
14061
14062 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14063 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14064 const RISCVSubtarget &Subtarget) {
14065 assert(isSupportedRoot(Root, Subtarget) &&
14066 "Trying to build an helper with an "
14067 "unsupported root");
14068 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14070 OrigOperand = Root->getOperand(OperandIdx);
14071
14072 unsigned Opc = Root->getOpcode();
14073 switch (Opc) {
14074 // We consider
14075 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14076 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14077 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14084 if (OperandIdx == 1) {
14085 SupportsZExt =
14087 SupportsSExt =
14089 SupportsFPExt =
14091 // There's no existing extension here, so we don't have to worry about
14092 // making sure it gets removed.
14093 EnforceOneUse = false;
14094 break;
14095 }
14096 [[fallthrough]];
14097 default:
14098 fillUpExtensionSupport(Root, DAG, Subtarget);
14099 break;
14100 }
14101 }
14102
14103 /// Helper function to get the Mask and VL from \p Root.
14104 static std::pair<SDValue, SDValue>
14105 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14106 const RISCVSubtarget &Subtarget) {
14107 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14108 switch (Root->getOpcode()) {
14109 case ISD::ADD:
14110 case ISD::SUB:
14111 case ISD::MUL:
14112 case ISD::OR:
14113 case ISD::SHL: {
14114 SDLoc DL(Root);
14115 MVT VT = Root->getSimpleValueType(0);
14116 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14117 }
14118 default:
14119 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14120 }
14121 }
14122
14123 /// Helper function to check if \p N is commutative with respect to the
14124 /// foldings that are supported by this class.
14125 static bool isCommutative(const SDNode *N) {
14126 switch (N->getOpcode()) {
14127 case ISD::ADD:
14128 case ISD::MUL:
14129 case ISD::OR:
14130 case RISCVISD::ADD_VL:
14131 case RISCVISD::MUL_VL:
14134 case RISCVISD::FADD_VL:
14135 case RISCVISD::FMUL_VL:
14137 return true;
14138 case ISD::SUB:
14139 case RISCVISD::SUB_VL:
14142 case RISCVISD::FSUB_VL:
14144 case ISD::SHL:
14145 case RISCVISD::SHL_VL:
14146 return false;
14147 default:
14148 llvm_unreachable("Unexpected opcode");
14149 }
14150 }
14151
14152 /// Get a list of combine to try for folding extensions in \p Root.
14153 /// Note that each returned CombineToTry function doesn't actually modify
14154 /// anything. Instead they produce an optional CombineResult that if not None,
14155 /// need to be materialized for the combine to be applied.
14156 /// \see CombineResult::materialize.
14157 /// If the related CombineToTry function returns std::nullopt, that means the
14158 /// combine didn't match.
14159 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14160};
14161
14162/// Helper structure that holds all the necessary information to materialize a
14163/// combine that does some extension folding.
14164struct CombineResult {
14165 /// Opcode to be generated when materializing the combine.
14166 unsigned TargetOpcode;
14167 // No value means no extension is needed.
14168 std::optional<ExtKind> LHSExt;
14169 std::optional<ExtKind> RHSExt;
14170 /// Root of the combine.
14171 SDNode *Root;
14172 /// LHS of the TargetOpcode.
14173 NodeExtensionHelper LHS;
14174 /// RHS of the TargetOpcode.
14175 NodeExtensionHelper RHS;
14176
14177 CombineResult(unsigned TargetOpcode, SDNode *Root,
14178 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14179 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14180 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14181 LHS(LHS), RHS(RHS) {}
14182
14183 /// Return a value that uses TargetOpcode and that can be used to replace
14184 /// Root.
14185 /// The actual replacement is *not* done in that method.
14186 SDValue materialize(SelectionDAG &DAG,
14187 const RISCVSubtarget &Subtarget) const {
14188 SDValue Mask, VL, Merge;
14189 std::tie(Mask, VL) =
14190 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14191 switch (Root->getOpcode()) {
14192 default:
14193 Merge = Root->getOperand(2);
14194 break;
14195 case ISD::ADD:
14196 case ISD::SUB:
14197 case ISD::MUL:
14198 case ISD::OR:
14199 case ISD::SHL:
14200 Merge = DAG.getUNDEF(Root->getValueType(0));
14201 break;
14202 }
14203 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14204 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14205 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14206 Merge, Mask, VL);
14207 }
14208};
14209
14210/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14211/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14212/// are zext) and LHS and RHS can be folded into Root.
14213/// AllowExtMask define which form `ext` can take in this pattern.
14214///
14215/// \note If the pattern can match with both zext and sext, the returned
14216/// CombineResult will feature the zext result.
14217///
14218/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14219/// can be used to apply the pattern.
14220static std::optional<CombineResult>
14221canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14222 const NodeExtensionHelper &RHS,
14223 uint8_t AllowExtMask, SelectionDAG &DAG,
14224 const RISCVSubtarget &Subtarget) {
14225 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14226 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14227 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14228 /*RHSExt=*/{ExtKind::ZExt});
14229 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14230 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14231 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14232 /*RHSExt=*/{ExtKind::SExt});
14233 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14234 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14235 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14236 /*RHSExt=*/{ExtKind::FPExt});
14237 return std::nullopt;
14238}
14239
14240/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14241/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14242/// are zext) and LHS and RHS can be folded into Root.
14243///
14244/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14245/// can be used to apply the pattern.
14246static std::optional<CombineResult>
14247canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14248 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14249 const RISCVSubtarget &Subtarget) {
14250 return canFoldToVWWithSameExtensionImpl(
14251 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14252 Subtarget);
14253}
14254
14255/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14256///
14257/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14258/// can be used to apply the pattern.
14259static std::optional<CombineResult>
14260canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14261 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14262 const RISCVSubtarget &Subtarget) {
14263 if (RHS.SupportsFPExt)
14264 return CombineResult(
14265 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14266 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14267
14268 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14269 // sext/zext?
14270 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14271 // purposes.
14272 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14273 return CombineResult(
14274 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14275 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14276 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14277 return CombineResult(
14278 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14279 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14280 return std::nullopt;
14281}
14282
14283/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14284///
14285/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14286/// can be used to apply the pattern.
14287static std::optional<CombineResult>
14288canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14289 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14290 const RISCVSubtarget &Subtarget) {
14291 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14292 Subtarget);
14293}
14294
14295/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14296///
14297/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14298/// can be used to apply the pattern.
14299static std::optional<CombineResult>
14300canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14301 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14302 const RISCVSubtarget &Subtarget) {
14303 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14304 Subtarget);
14305}
14306
14307/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14308///
14309/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14310/// can be used to apply the pattern.
14311static std::optional<CombineResult>
14312canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14313 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14314 const RISCVSubtarget &Subtarget) {
14315 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14316 Subtarget);
14317}
14318
14319/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14320///
14321/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14322/// can be used to apply the pattern.
14323static std::optional<CombineResult>
14324canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14325 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14326 const RISCVSubtarget &Subtarget) {
14327
14328 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14329 return std::nullopt;
14330 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14331 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14332 /*RHSExt=*/{ExtKind::ZExt});
14333}
14334
14336NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14337 SmallVector<CombineToTry> Strategies;
14338 switch (Root->getOpcode()) {
14339 case ISD::ADD:
14340 case ISD::SUB:
14341 case ISD::OR:
14342 case RISCVISD::ADD_VL:
14343 case RISCVISD::SUB_VL:
14344 case RISCVISD::FADD_VL:
14345 case RISCVISD::FSUB_VL:
14346 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14347 Strategies.push_back(canFoldToVWWithSameExtension);
14348 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14349 Strategies.push_back(canFoldToVW_W);
14350 break;
14351 case RISCVISD::FMUL_VL:
14352 Strategies.push_back(canFoldToVWWithSameExtension);
14353 break;
14354 case ISD::MUL:
14355 case RISCVISD::MUL_VL:
14356 // mul -> vwmul(u)
14357 Strategies.push_back(canFoldToVWWithSameExtension);
14358 // mul -> vwmulsu
14359 Strategies.push_back(canFoldToVW_SU);
14360 break;
14361 case ISD::SHL:
14362 case RISCVISD::SHL_VL:
14363 // shl -> vwsll
14364 Strategies.push_back(canFoldToVWWithZEXT);
14365 break;
14368 // vwadd_w|vwsub_w -> vwadd|vwsub
14369 Strategies.push_back(canFoldToVWWithSEXT);
14370 break;
14373 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14374 Strategies.push_back(canFoldToVWWithZEXT);
14375 break;
14378 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14379 Strategies.push_back(canFoldToVWWithFPEXT);
14380 break;
14381 default:
14382 llvm_unreachable("Unexpected opcode");
14383 }
14384 return Strategies;
14385}
14386} // End anonymous namespace.
14387
14388/// Combine a binary operation to its equivalent VW or VW_W form.
14389/// The supported combines are:
14390/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14391/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14392/// mul | mul_vl -> vwmul(u) | vwmul_su
14393/// shl | shl_vl -> vwsll
14394/// fadd_vl -> vfwadd | vfwadd_w
14395/// fsub_vl -> vfwsub | vfwsub_w
14396/// fmul_vl -> vfwmul
14397/// vwadd_w(u) -> vwadd(u)
14398/// vwsub_w(u) -> vwsub(u)
14399/// vfwadd_w -> vfwadd
14400/// vfwsub_w -> vfwsub
14403 const RISCVSubtarget &Subtarget) {
14404 SelectionDAG &DAG = DCI.DAG;
14405 if (DCI.isBeforeLegalize())
14406 return SDValue();
14407
14408 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14409 return SDValue();
14410
14411 SmallVector<SDNode *> Worklist;
14412 SmallSet<SDNode *, 8> Inserted;
14413 Worklist.push_back(N);
14414 Inserted.insert(N);
14415 SmallVector<CombineResult> CombinesToApply;
14416
14417 while (!Worklist.empty()) {
14418 SDNode *Root = Worklist.pop_back_val();
14419 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14420 return SDValue();
14421
14422 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
14423 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
14424 auto AppendUsersIfNeeded = [&Worklist,
14425 &Inserted](const NodeExtensionHelper &Op) {
14426 if (Op.needToPromoteOtherUsers()) {
14427 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14428 if (Inserted.insert(TheUse).second)
14429 Worklist.push_back(TheUse);
14430 }
14431 }
14432 };
14433
14434 // Control the compile time by limiting the number of node we look at in
14435 // total.
14436 if (Inserted.size() > ExtensionMaxWebSize)
14437 return SDValue();
14438
14440 NodeExtensionHelper::getSupportedFoldings(N);
14441
14442 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14443 bool Matched = false;
14444 for (int Attempt = 0;
14445 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
14446 ++Attempt) {
14447
14448 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14449 FoldingStrategies) {
14450 std::optional<CombineResult> Res =
14451 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
14452 if (Res) {
14453 Matched = true;
14454 CombinesToApply.push_back(*Res);
14455 // All the inputs that are extended need to be folded, otherwise
14456 // we would be leaving the old input (since it is may still be used),
14457 // and the new one.
14458 if (Res->LHSExt.has_value())
14459 AppendUsersIfNeeded(LHS);
14460 if (Res->RHSExt.has_value())
14461 AppendUsersIfNeeded(RHS);
14462 break;
14463 }
14464 }
14465 std::swap(LHS, RHS);
14466 }
14467 // Right now we do an all or nothing approach.
14468 if (!Matched)
14469 return SDValue();
14470 }
14471 // Store the value for the replacement of the input node separately.
14472 SDValue InputRootReplacement;
14473 // We do the RAUW after we materialize all the combines, because some replaced
14474 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14475 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14476 // yet-to-be-visited CombinesToApply roots.
14478 ValuesToReplace.reserve(CombinesToApply.size());
14479 for (CombineResult Res : CombinesToApply) {
14480 SDValue NewValue = Res.materialize(DAG, Subtarget);
14481 if (!InputRootReplacement) {
14482 assert(Res.Root == N &&
14483 "First element is expected to be the current node");
14484 InputRootReplacement = NewValue;
14485 } else {
14486 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14487 }
14488 }
14489 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14490 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14491 DCI.AddToWorklist(OldNewValues.second.getNode());
14492 }
14493 return InputRootReplacement;
14494}
14495
14496// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14497// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14498// y will be the Passthru and cond will be the Mask.
14500 unsigned Opc = N->getOpcode();
14503
14504 SDValue Y = N->getOperand(0);
14505 SDValue MergeOp = N->getOperand(1);
14506 unsigned MergeOpc = MergeOp.getOpcode();
14507
14508 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14509 return SDValue();
14510
14511 SDValue X = MergeOp->getOperand(1);
14512
14513 if (!MergeOp.hasOneUse())
14514 return SDValue();
14515
14516 // Passthru should be undef
14517 SDValue Passthru = N->getOperand(2);
14518 if (!Passthru.isUndef())
14519 return SDValue();
14520
14521 // Mask should be all ones
14522 SDValue Mask = N->getOperand(3);
14523 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14524 return SDValue();
14525
14526 // False value of MergeOp should be all zeros
14527 SDValue Z = MergeOp->getOperand(2);
14528
14529 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14530 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14531 Z = Z.getOperand(1);
14532
14533 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14534 return SDValue();
14535
14536 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14537 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14538 N->getFlags());
14539}
14540
14543 const RISCVSubtarget &Subtarget) {
14544 [[maybe_unused]] unsigned Opc = N->getOpcode();
14547
14548 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14549 return V;
14550
14551 return combineVWADDSUBWSelect(N, DCI.DAG);
14552}
14553
14554// Helper function for performMemPairCombine.
14555// Try to combine the memory loads/stores LSNode1 and LSNode2
14556// into a single memory pair operation.
14558 LSBaseSDNode *LSNode2, SDValue BasePtr,
14559 uint64_t Imm) {
14561 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14562
14563 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14564 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14565 return SDValue();
14566
14568 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14569
14570 // The new operation has twice the width.
14571 MVT XLenVT = Subtarget.getXLenVT();
14572 EVT MemVT = LSNode1->getMemoryVT();
14573 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14574 MachineMemOperand *MMO = LSNode1->getMemOperand();
14576 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14577
14578 if (LSNode1->getOpcode() == ISD::LOAD) {
14579 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14580 unsigned Opcode;
14581 if (MemVT == MVT::i32)
14582 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14583 else
14584 Opcode = RISCVISD::TH_LDD;
14585
14586 SDValue Res = DAG.getMemIntrinsicNode(
14587 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14588 {LSNode1->getChain(), BasePtr,
14589 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14590 NewMemVT, NewMMO);
14591
14592 SDValue Node1 =
14593 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14594 SDValue Node2 =
14595 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14596
14597 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14598 return Node1;
14599 } else {
14600 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14601
14602 SDValue Res = DAG.getMemIntrinsicNode(
14603 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14604 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14605 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14606 NewMemVT, NewMMO);
14607
14608 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14609 return Res;
14610 }
14611}
14612
14613// Try to combine two adjacent loads/stores to a single pair instruction from
14614// the XTHeadMemPair vendor extension.
14617 SelectionDAG &DAG = DCI.DAG;
14619 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14620
14621 // Target does not support load/store pair.
14622 if (!Subtarget.hasVendorXTHeadMemPair())
14623 return SDValue();
14624
14625 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14626 EVT MemVT = LSNode1->getMemoryVT();
14627 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14628
14629 // No volatile, indexed or atomic loads/stores.
14630 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14631 return SDValue();
14632
14633 // Function to get a base + constant representation from a memory value.
14634 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14635 if (Ptr->getOpcode() == ISD::ADD)
14636 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14637 return {Ptr->getOperand(0), C1->getZExtValue()};
14638 return {Ptr, 0};
14639 };
14640
14641 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14642
14643 SDValue Chain = N->getOperand(0);
14644 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14645 UI != UE; ++UI) {
14646 SDUse &Use = UI.getUse();
14647 if (Use.getUser() != N && Use.getResNo() == 0 &&
14648 Use.getUser()->getOpcode() == N->getOpcode()) {
14649 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14650
14651 // No volatile, indexed or atomic loads/stores.
14652 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14653 continue;
14654
14655 // Check if LSNode1 and LSNode2 have the same type and extension.
14656 if (LSNode1->getOpcode() == ISD::LOAD)
14657 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14658 cast<LoadSDNode>(LSNode1)->getExtensionType())
14659 continue;
14660
14661 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14662 continue;
14663
14664 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14665
14666 // Check if the base pointer is the same for both instruction.
14667 if (Base1 != Base2)
14668 continue;
14669
14670 // Check if the offsets match the XTHeadMemPair encoding contraints.
14671 bool Valid = false;
14672 if (MemVT == MVT::i32) {
14673 // Check for adjacent i32 values and a 2-bit index.
14674 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14675 Valid = true;
14676 } else if (MemVT == MVT::i64) {
14677 // Check for adjacent i64 values and a 2-bit index.
14678 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14679 Valid = true;
14680 }
14681
14682 if (!Valid)
14683 continue;
14684
14685 // Try to combine.
14686 if (SDValue Res =
14687 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14688 return Res;
14689 }
14690 }
14691
14692 return SDValue();
14693}
14694
14695// Fold
14696// (fp_to_int (froundeven X)) -> fcvt X, rne
14697// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14698// (fp_to_int (ffloor X)) -> fcvt X, rdn
14699// (fp_to_int (fceil X)) -> fcvt X, rup
14700// (fp_to_int (fround X)) -> fcvt X, rmm
14701// (fp_to_int (frint X)) -> fcvt X
14704 const RISCVSubtarget &Subtarget) {
14705 SelectionDAG &DAG = DCI.DAG;
14706 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14707 MVT XLenVT = Subtarget.getXLenVT();
14708
14709 SDValue Src = N->getOperand(0);
14710
14711 // Don't do this for strict-fp Src.
14712 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14713 return SDValue();
14714
14715 // Ensure the FP type is legal.
14716 if (!TLI.isTypeLegal(Src.getValueType()))
14717 return SDValue();
14718
14719 // Don't do this for f16 with Zfhmin and not Zfh.
14720 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14721 return SDValue();
14722
14723 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14724 // If the result is invalid, we didn't find a foldable instruction.
14725 if (FRM == RISCVFPRndMode::Invalid)
14726 return SDValue();
14727
14728 SDLoc DL(N);
14729 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14730 EVT VT = N->getValueType(0);
14731
14732 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14733 MVT SrcVT = Src.getSimpleValueType();
14734 MVT SrcContainerVT = SrcVT;
14735 MVT ContainerVT = VT.getSimpleVT();
14736 SDValue XVal = Src.getOperand(0);
14737
14738 // For widening and narrowing conversions we just combine it into a
14739 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14740 // end up getting lowered to their appropriate pseudo instructions based on
14741 // their operand types
14742 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
14743 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
14744 return SDValue();
14745
14746 // Make fixed-length vectors scalable first
14747 if (SrcVT.isFixedLengthVector()) {
14748 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
14749 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
14750 ContainerVT =
14751 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
14752 }
14753
14754 auto [Mask, VL] =
14755 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
14756
14757 SDValue FpToInt;
14758 if (FRM == RISCVFPRndMode::RTZ) {
14759 // Use the dedicated trunc static rounding mode if we're truncating so we
14760 // don't need to generate calls to fsrmi/fsrm
14761 unsigned Opc =
14763 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14764 } else if (FRM == RISCVFPRndMode::DYN) {
14765 unsigned Opc =
14767 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14768 } else {
14769 unsigned Opc =
14771 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
14772 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
14773 }
14774
14775 // If converted from fixed-length to scalable, convert back
14776 if (VT.isFixedLengthVector())
14777 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
14778
14779 return FpToInt;
14780 }
14781
14782 // Only handle XLen or i32 types. Other types narrower than XLen will
14783 // eventually be legalized to XLenVT.
14784 if (VT != MVT::i32 && VT != XLenVT)
14785 return SDValue();
14786
14787 unsigned Opc;
14788 if (VT == XLenVT)
14789 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14790 else
14792
14793 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
14794 DAG.getTargetConstant(FRM, DL, XLenVT));
14795 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
14796}
14797
14798// Fold
14799// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
14800// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
14801// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
14802// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
14803// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
14804// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
14807 const RISCVSubtarget &Subtarget) {
14808 SelectionDAG &DAG = DCI.DAG;
14809 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14810 MVT XLenVT = Subtarget.getXLenVT();
14811
14812 // Only handle XLen types. Other types narrower than XLen will eventually be
14813 // legalized to XLenVT.
14814 EVT DstVT = N->getValueType(0);
14815 if (DstVT != XLenVT)
14816 return SDValue();
14817
14818 SDValue Src = N->getOperand(0);
14819
14820 // Don't do this for strict-fp Src.
14821 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14822 return SDValue();
14823
14824 // Ensure the FP type is also legal.
14825 if (!TLI.isTypeLegal(Src.getValueType()))
14826 return SDValue();
14827
14828 // Don't do this for f16 with Zfhmin and not Zfh.
14829 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14830 return SDValue();
14831
14832 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14833
14834 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14835 if (FRM == RISCVFPRndMode::Invalid)
14836 return SDValue();
14837
14838 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
14839
14840 unsigned Opc;
14841 if (SatVT == DstVT)
14842 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14843 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
14845 else
14846 return SDValue();
14847 // FIXME: Support other SatVTs by clamping before or after the conversion.
14848
14849 Src = Src.getOperand(0);
14850
14851 SDLoc DL(N);
14852 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
14853 DAG.getTargetConstant(FRM, DL, XLenVT));
14854
14855 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
14856 // extend.
14857 if (Opc == RISCVISD::FCVT_WU_RV64)
14858 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
14859
14860 // RISC-V FP-to-int conversions saturate to the destination register size, but
14861 // don't produce 0 for nan.
14862 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
14863 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
14864}
14865
14866// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
14867// smaller than XLenVT.
14869 const RISCVSubtarget &Subtarget) {
14870 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
14871
14872 SDValue Src = N->getOperand(0);
14873 if (Src.getOpcode() != ISD::BSWAP)
14874 return SDValue();
14875
14876 EVT VT = N->getValueType(0);
14877 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
14878 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
14879 return SDValue();
14880
14881 SDLoc DL(N);
14882 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
14883}
14884
14885// Convert from one FMA opcode to another based on whether we are negating the
14886// multiply result and/or the accumulator.
14887// NOTE: Only supports RVV operations with VL.
14888static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
14889 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
14890 if (NegMul) {
14891 // clang-format off
14892 switch (Opcode) {
14893 default: llvm_unreachable("Unexpected opcode");
14894 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14895 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14896 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14897 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14902 }
14903 // clang-format on
14904 }
14905
14906 // Negating the accumulator changes ADD<->SUB.
14907 if (NegAcc) {
14908 // clang-format off
14909 switch (Opcode) {
14910 default: llvm_unreachable("Unexpected opcode");
14911 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14912 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14913 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14914 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14919 }
14920 // clang-format on
14921 }
14922
14923 return Opcode;
14924}
14925
14927 // Fold FNEG_VL into FMA opcodes.
14928 // The first operand of strict-fp is chain.
14929 unsigned Offset = N->isTargetStrictFPOpcode();
14930 SDValue A = N->getOperand(0 + Offset);
14931 SDValue B = N->getOperand(1 + Offset);
14932 SDValue C = N->getOperand(2 + Offset);
14933 SDValue Mask = N->getOperand(3 + Offset);
14934 SDValue VL = N->getOperand(4 + Offset);
14935
14936 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
14937 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
14938 V.getOperand(2) == VL) {
14939 // Return the negated input.
14940 V = V.getOperand(0);
14941 return true;
14942 }
14943
14944 return false;
14945 };
14946
14947 bool NegA = invertIfNegative(A);
14948 bool NegB = invertIfNegative(B);
14949 bool NegC = invertIfNegative(C);
14950
14951 // If no operands are negated, we're done.
14952 if (!NegA && !NegB && !NegC)
14953 return SDValue();
14954
14955 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
14956 if (N->isTargetStrictFPOpcode())
14957 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
14958 {N->getOperand(0), A, B, C, Mask, VL});
14959 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
14960 VL);
14961}
14962
14964 const RISCVSubtarget &Subtarget) {
14966 return V;
14967
14968 if (N->getValueType(0).isScalableVector() &&
14969 N->getValueType(0).getVectorElementType() == MVT::f32 &&
14970 (Subtarget.hasVInstructionsF16Minimal() &&
14971 !Subtarget.hasVInstructionsF16())) {
14972 return SDValue();
14973 }
14974
14975 // FIXME: Ignore strict opcodes for now.
14976 if (N->isTargetStrictFPOpcode())
14977 return SDValue();
14978
14979 // Try to form widening FMA.
14980 SDValue Op0 = N->getOperand(0);
14981 SDValue Op1 = N->getOperand(1);
14982 SDValue Mask = N->getOperand(3);
14983 SDValue VL = N->getOperand(4);
14984
14985 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
14987 return SDValue();
14988
14989 // TODO: Refactor to handle more complex cases similar to
14990 // combineBinOp_VLToVWBinOp_VL.
14991 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
14992 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
14993 return SDValue();
14994
14995 // Check the mask and VL are the same.
14996 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
14997 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
14998 return SDValue();
14999
15000 unsigned NewOpc;
15001 switch (N->getOpcode()) {
15002 default:
15003 llvm_unreachable("Unexpected opcode");
15005 NewOpc = RISCVISD::VFWMADD_VL;
15006 break;
15008 NewOpc = RISCVISD::VFWNMSUB_VL;
15009 break;
15011 NewOpc = RISCVISD::VFWNMADD_VL;
15012 break;
15014 NewOpc = RISCVISD::VFWMSUB_VL;
15015 break;
15016 }
15017
15018 Op0 = Op0.getOperand(0);
15019 Op1 = Op1.getOperand(0);
15020
15021 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
15022 N->getOperand(2), Mask, VL);
15023}
15024
15026 const RISCVSubtarget &Subtarget) {
15027 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15028
15029 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15030 return SDValue();
15031
15032 if (!isa<ConstantSDNode>(N->getOperand(1)))
15033 return SDValue();
15034 uint64_t ShAmt = N->getConstantOperandVal(1);
15035 if (ShAmt > 32)
15036 return SDValue();
15037
15038 SDValue N0 = N->getOperand(0);
15039
15040 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15041 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15042 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15043 if (ShAmt < 32 &&
15044 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15045 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15046 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15047 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15048 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15049 if (LShAmt < 32) {
15050 SDLoc ShlDL(N0.getOperand(0));
15051 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15052 N0.getOperand(0).getOperand(0),
15053 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15054 SDLoc DL(N);
15055 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15056 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15057 }
15058 }
15059
15060 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15061 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15062 //
15063 // Also try these folds where an add or sub is in the middle.
15064 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15065 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15066 SDValue Shl;
15067 ConstantSDNode *AddC = nullptr;
15068
15069 // We might have an ADD or SUB between the SRA and SHL.
15070 bool IsAdd = N0.getOpcode() == ISD::ADD;
15071 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15072 // Other operand needs to be a constant we can modify.
15073 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15074 if (!AddC)
15075 return SDValue();
15076
15077 // AddC needs to have at least 32 trailing zeros.
15078 if (AddC->getAPIntValue().countr_zero() < 32)
15079 return SDValue();
15080
15081 // All users should be a shift by constant less than or equal to 32. This
15082 // ensures we'll do this optimization for each of them to produce an
15083 // add/sub+sext_inreg they can all share.
15084 for (SDNode *U : N0->uses()) {
15085 if (U->getOpcode() != ISD::SRA ||
15086 !isa<ConstantSDNode>(U->getOperand(1)) ||
15087 U->getConstantOperandVal(1) > 32)
15088 return SDValue();
15089 }
15090
15091 Shl = N0.getOperand(IsAdd ? 0 : 1);
15092 } else {
15093 // Not an ADD or SUB.
15094 Shl = N0;
15095 }
15096
15097 // Look for a shift left by 32.
15098 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15099 Shl.getConstantOperandVal(1) != 32)
15100 return SDValue();
15101
15102 // We if we didn't look through an add/sub, then the shl should have one use.
15103 // If we did look through an add/sub, the sext_inreg we create is free so
15104 // we're only creating 2 new instructions. It's enough to only remove the
15105 // original sra+add/sub.
15106 if (!AddC && !Shl.hasOneUse())
15107 return SDValue();
15108
15109 SDLoc DL(N);
15110 SDValue In = Shl.getOperand(0);
15111
15112 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15113 // constant.
15114 if (AddC) {
15115 SDValue ShiftedAddC =
15116 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15117 if (IsAdd)
15118 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15119 else
15120 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15121 }
15122
15123 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15124 DAG.getValueType(MVT::i32));
15125 if (ShAmt == 32)
15126 return SExt;
15127
15128 return DAG.getNode(
15129 ISD::SHL, DL, MVT::i64, SExt,
15130 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15131}
15132
15133// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15134// the result is used as the conditon of a br_cc or select_cc we can invert,
15135// inverting the setcc is free, and Z is 0/1. Caller will invert the
15136// br_cc/select_cc.
15138 bool IsAnd = Cond.getOpcode() == ISD::AND;
15139 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15140 return SDValue();
15141
15142 if (!Cond.hasOneUse())
15143 return SDValue();
15144
15145 SDValue Setcc = Cond.getOperand(0);
15146 SDValue Xor = Cond.getOperand(1);
15147 // Canonicalize setcc to LHS.
15148 if (Setcc.getOpcode() != ISD::SETCC)
15149 std::swap(Setcc, Xor);
15150 // LHS should be a setcc and RHS should be an xor.
15151 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15152 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15153 return SDValue();
15154
15155 // If the condition is an And, SimplifyDemandedBits may have changed
15156 // (xor Z, 1) to (not Z).
15157 SDValue Xor1 = Xor.getOperand(1);
15158 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15159 return SDValue();
15160
15161 EVT VT = Cond.getValueType();
15162 SDValue Xor0 = Xor.getOperand(0);
15163
15164 // The LHS of the xor needs to be 0/1.
15166 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15167 return SDValue();
15168
15169 // We can only invert integer setccs.
15170 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15171 if (!SetCCOpVT.isScalarInteger())
15172 return SDValue();
15173
15174 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15175 if (ISD::isIntEqualitySetCC(CCVal)) {
15176 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15177 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15178 Setcc.getOperand(1), CCVal);
15179 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15180 // Invert (setlt 0, X) by converting to (setlt X, 1).
15181 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15182 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15183 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15184 // (setlt X, 1) by converting to (setlt 0, X).
15185 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15186 DAG.getConstant(0, SDLoc(Setcc), VT),
15187 Setcc.getOperand(0), CCVal);
15188 } else
15189 return SDValue();
15190
15191 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15192 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15193}
15194
15195// Perform common combines for BR_CC and SELECT_CC condtions.
15196static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15197 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15198 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15199
15200 // As far as arithmetic right shift always saves the sign,
15201 // shift can be omitted.
15202 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15203 // setge (sra X, N), 0 -> setge X, 0
15204 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15205 LHS.getOpcode() == ISD::SRA) {
15206 LHS = LHS.getOperand(0);
15207 return true;
15208 }
15209
15210 if (!ISD::isIntEqualitySetCC(CCVal))
15211 return false;
15212
15213 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15214 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15215 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15216 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15217 // If we're looking for eq 0 instead of ne 0, we need to invert the
15218 // condition.
15219 bool Invert = CCVal == ISD::SETEQ;
15220 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15221 if (Invert)
15222 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15223
15224 RHS = LHS.getOperand(1);
15225 LHS = LHS.getOperand(0);
15226 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15227
15228 CC = DAG.getCondCode(CCVal);
15229 return true;
15230 }
15231
15232 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15233 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15234 RHS = LHS.getOperand(1);
15235 LHS = LHS.getOperand(0);
15236 return true;
15237 }
15238
15239 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15240 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15241 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15242 SDValue LHS0 = LHS.getOperand(0);
15243 if (LHS0.getOpcode() == ISD::AND &&
15244 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15245 uint64_t Mask = LHS0.getConstantOperandVal(1);
15246 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15247 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15248 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15249 CC = DAG.getCondCode(CCVal);
15250
15251 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15252 LHS = LHS0.getOperand(0);
15253 if (ShAmt != 0)
15254 LHS =
15255 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15256 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15257 return true;
15258 }
15259 }
15260 }
15261
15262 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15263 // This can occur when legalizing some floating point comparisons.
15264 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15265 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15266 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15267 CC = DAG.getCondCode(CCVal);
15268 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15269 return true;
15270 }
15271
15272 if (isNullConstant(RHS)) {
15273 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15274 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15275 CC = DAG.getCondCode(CCVal);
15276 LHS = NewCond;
15277 return true;
15278 }
15279 }
15280
15281 return false;
15282}
15283
15284// Fold
15285// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15286// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15287// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15288// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15290 SDValue TrueVal, SDValue FalseVal,
15291 bool Swapped) {
15292 bool Commutative = true;
15293 unsigned Opc = TrueVal.getOpcode();
15294 switch (Opc) {
15295 default:
15296 return SDValue();
15297 case ISD::SHL:
15298 case ISD::SRA:
15299 case ISD::SRL:
15300 case ISD::SUB:
15301 Commutative = false;
15302 break;
15303 case ISD::ADD:
15304 case ISD::OR:
15305 case ISD::XOR:
15306 break;
15307 }
15308
15309 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15310 return SDValue();
15311
15312 unsigned OpToFold;
15313 if (FalseVal == TrueVal.getOperand(0))
15314 OpToFold = 0;
15315 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15316 OpToFold = 1;
15317 else
15318 return SDValue();
15319
15320 EVT VT = N->getValueType(0);
15321 SDLoc DL(N);
15322 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15323 EVT OtherOpVT = OtherOp->getValueType(0);
15324 SDValue IdentityOperand =
15325 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15326 if (!Commutative)
15327 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15328 assert(IdentityOperand && "No identity operand!");
15329
15330 if (Swapped)
15331 std::swap(OtherOp, IdentityOperand);
15332 SDValue NewSel =
15333 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15334 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15335}
15336
15337// This tries to get rid of `select` and `icmp` that are being used to handle
15338// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15340 SDValue Cond = N->getOperand(0);
15341
15342 // This represents either CTTZ or CTLZ instruction.
15343 SDValue CountZeroes;
15344
15345 SDValue ValOnZero;
15346
15347 if (Cond.getOpcode() != ISD::SETCC)
15348 return SDValue();
15349
15350 if (!isNullConstant(Cond->getOperand(1)))
15351 return SDValue();
15352
15353 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15354 if (CCVal == ISD::CondCode::SETEQ) {
15355 CountZeroes = N->getOperand(2);
15356 ValOnZero = N->getOperand(1);
15357 } else if (CCVal == ISD::CondCode::SETNE) {
15358 CountZeroes = N->getOperand(1);
15359 ValOnZero = N->getOperand(2);
15360 } else {
15361 return SDValue();
15362 }
15363
15364 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15365 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15366 CountZeroes = CountZeroes.getOperand(0);
15367
15368 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15369 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15370 CountZeroes.getOpcode() != ISD::CTLZ &&
15371 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15372 return SDValue();
15373
15374 if (!isNullConstant(ValOnZero))
15375 return SDValue();
15376
15377 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15378 if (Cond->getOperand(0) != CountZeroesArgument)
15379 return SDValue();
15380
15381 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15382 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15383 CountZeroes.getValueType(), CountZeroesArgument);
15384 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15385 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15386 CountZeroes.getValueType(), CountZeroesArgument);
15387 }
15388
15389 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15390 SDValue BitWidthMinusOne =
15391 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15392
15393 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15394 CountZeroes, BitWidthMinusOne);
15395 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15396}
15397
15399 const RISCVSubtarget &Subtarget) {
15400 SDValue Cond = N->getOperand(0);
15401 SDValue True = N->getOperand(1);
15402 SDValue False = N->getOperand(2);
15403 SDLoc DL(N);
15404 EVT VT = N->getValueType(0);
15405 EVT CondVT = Cond.getValueType();
15406
15407 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15408 return SDValue();
15409
15410 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15411 // BEXTI, where C is power of 2.
15412 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15413 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15414 SDValue LHS = Cond.getOperand(0);
15415 SDValue RHS = Cond.getOperand(1);
15416 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15417 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15418 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15419 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15420 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15421 return DAG.getSelect(DL, VT,
15422 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15423 False, True);
15424 }
15425 }
15426 return SDValue();
15427}
15428
15430 const RISCVSubtarget &Subtarget) {
15431 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15432 return Folded;
15433
15434 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15435 return V;
15436
15437 if (Subtarget.hasConditionalMoveFusion())
15438 return SDValue();
15439
15440 SDValue TrueVal = N->getOperand(1);
15441 SDValue FalseVal = N->getOperand(2);
15442 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15443 return V;
15444 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15445}
15446
15447/// If we have a build_vector where each lane is binop X, C, where C
15448/// is a constant (but not necessarily the same constant on all lanes),
15449/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15450/// We assume that materializing a constant build vector will be no more
15451/// expensive that performing O(n) binops.
15453 const RISCVSubtarget &Subtarget,
15454 const RISCVTargetLowering &TLI) {
15455 SDLoc DL(N);
15456 EVT VT = N->getValueType(0);
15457
15458 assert(!VT.isScalableVector() && "unexpected build vector");
15459
15460 if (VT.getVectorNumElements() == 1)
15461 return SDValue();
15462
15463 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15464 if (!TLI.isBinOp(Opcode))
15465 return SDValue();
15466
15467 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15468 return SDValue();
15469
15470 // This BUILD_VECTOR involves an implicit truncation, and sinking
15471 // truncates through binops is non-trivial.
15472 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15473 return SDValue();
15474
15475 SmallVector<SDValue> LHSOps;
15476 SmallVector<SDValue> RHSOps;
15477 for (SDValue Op : N->ops()) {
15478 if (Op.isUndef()) {
15479 // We can't form a divide or remainder from undef.
15480 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15481 return SDValue();
15482
15483 LHSOps.push_back(Op);
15484 RHSOps.push_back(Op);
15485 continue;
15486 }
15487
15488 // TODO: We can handle operations which have an neutral rhs value
15489 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15490 // of profit in a more explicit manner.
15491 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15492 return SDValue();
15493
15494 LHSOps.push_back(Op.getOperand(0));
15495 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15496 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15497 return SDValue();
15498 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15499 // have different LHS and RHS types.
15500 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15501 return SDValue();
15502
15503 RHSOps.push_back(Op.getOperand(1));
15504 }
15505
15506 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15507 DAG.getBuildVector(VT, DL, RHSOps));
15508}
15509
15511 const RISCVSubtarget &Subtarget,
15512 const RISCVTargetLowering &TLI) {
15513 SDValue InVec = N->getOperand(0);
15514 SDValue InVal = N->getOperand(1);
15515 SDValue EltNo = N->getOperand(2);
15516 SDLoc DL(N);
15517
15518 EVT VT = InVec.getValueType();
15519 if (VT.isScalableVector())
15520 return SDValue();
15521
15522 if (!InVec.hasOneUse())
15523 return SDValue();
15524
15525 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15526 // move the insert_vector_elts into the arms of the binop. Note that
15527 // the new RHS must be a constant.
15528 const unsigned InVecOpcode = InVec->getOpcode();
15529 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15530 InVal.hasOneUse()) {
15531 SDValue InVecLHS = InVec->getOperand(0);
15532 SDValue InVecRHS = InVec->getOperand(1);
15533 SDValue InValLHS = InVal->getOperand(0);
15534 SDValue InValRHS = InVal->getOperand(1);
15535
15537 return SDValue();
15538 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15539 return SDValue();
15540 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15541 // have different LHS and RHS types.
15542 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15543 return SDValue();
15545 InVecLHS, InValLHS, EltNo);
15547 InVecRHS, InValRHS, EltNo);
15548 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15549 }
15550
15551 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15552 // move the insert_vector_elt to the source operand of the concat_vector.
15553 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15554 return SDValue();
15555
15556 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15557 if (!IndexC)
15558 return SDValue();
15559 unsigned Elt = IndexC->getZExtValue();
15560
15561 EVT ConcatVT = InVec.getOperand(0).getValueType();
15562 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15563 return SDValue();
15564 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15565 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15566
15567 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15568 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15569 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15570 ConcatOp, InVal, NewIdx);
15571
15572 SmallVector<SDValue> ConcatOps;
15573 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15574 ConcatOps[ConcatOpIdx] = ConcatOp;
15575 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15576}
15577
15578// If we're concatenating a series of vector loads like
15579// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15580// Then we can turn this into a strided load by widening the vector elements
15581// vlse32 p, stride=n
15583 const RISCVSubtarget &Subtarget,
15584 const RISCVTargetLowering &TLI) {
15585 SDLoc DL(N);
15586 EVT VT = N->getValueType(0);
15587
15588 // Only perform this combine on legal MVTs.
15589 if (!TLI.isTypeLegal(VT))
15590 return SDValue();
15591
15592 // TODO: Potentially extend this to scalable vectors
15593 if (VT.isScalableVector())
15594 return SDValue();
15595
15596 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15597 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15598 !SDValue(BaseLd, 0).hasOneUse())
15599 return SDValue();
15600
15601 EVT BaseLdVT = BaseLd->getValueType(0);
15602
15603 // Go through the loads and check that they're strided
15605 Lds.push_back(BaseLd);
15606 Align Align = BaseLd->getAlign();
15607 for (SDValue Op : N->ops().drop_front()) {
15608 auto *Ld = dyn_cast<LoadSDNode>(Op);
15609 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15610 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15611 Ld->getValueType(0) != BaseLdVT)
15612 return SDValue();
15613
15614 Lds.push_back(Ld);
15615
15616 // The common alignment is the most restrictive (smallest) of all the loads
15617 Align = std::min(Align, Ld->getAlign());
15618 }
15619
15620 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15621 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15622 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15623 // If the load ptrs can be decomposed into a common (Base + Index) with a
15624 // common constant stride, then return the constant stride.
15625 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15626 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15627 if (BIO1.equalBaseIndex(BIO2, DAG))
15628 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15629
15630 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15631 SDValue P1 = Ld1->getBasePtr();
15632 SDValue P2 = Ld2->getBasePtr();
15633 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15634 return {{P2.getOperand(1), false}};
15635 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15636 return {{P1.getOperand(1), true}};
15637
15638 return std::nullopt;
15639 };
15640
15641 // Get the distance between the first and second loads
15642 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15643 if (!BaseDiff)
15644 return SDValue();
15645
15646 // Check all the loads are the same distance apart
15647 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15648 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15649 return SDValue();
15650
15651 // TODO: At this point, we've successfully matched a generalized gather
15652 // load. Maybe we should emit that, and then move the specialized
15653 // matchers above and below into a DAG combine?
15654
15655 // Get the widened scalar type, e.g. v4i8 -> i64
15656 unsigned WideScalarBitWidth =
15657 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15658 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15659
15660 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15661 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15662 if (!TLI.isTypeLegal(WideVecVT))
15663 return SDValue();
15664
15665 // Check that the operation is legal
15666 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15667 return SDValue();
15668
15669 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15670 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
15671 ? std::get<SDValue>(StrideVariant)
15672 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
15673 Lds[0]->getOffset().getValueType());
15674 if (MustNegateStride)
15675 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15676
15677 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
15678 SDValue IntID =
15679 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15680 Subtarget.getXLenVT());
15681
15682 SDValue AllOneMask =
15683 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15684 DAG.getConstant(1, DL, MVT::i1));
15685
15686 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
15687 BaseLd->getBasePtr(), Stride, AllOneMask};
15688
15689 uint64_t MemSize;
15690 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15691 ConstStride && ConstStride->getSExtValue() >= 0)
15692 // total size = (elsize * n) + (stride - elsize) * (n-1)
15693 // = elsize + stride * (n-1)
15694 MemSize = WideScalarVT.getSizeInBits() +
15695 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15696 else
15697 // If Stride isn't constant, then we can't know how much it will load
15699
15701 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15702 Align);
15703
15704 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
15705 Ops, WideVecVT, MMO);
15706 for (SDValue Ld : N->ops())
15707 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15708
15709 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15710}
15711
15713 const RISCVSubtarget &Subtarget) {
15714
15715 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15716
15717 if (N->getValueType(0).isFixedLengthVector())
15718 return SDValue();
15719
15720 SDValue Addend = N->getOperand(0);
15721 SDValue MulOp = N->getOperand(1);
15722
15723 if (N->getOpcode() == RISCVISD::ADD_VL) {
15724 SDValue AddMergeOp = N->getOperand(2);
15725 if (!AddMergeOp.isUndef())
15726 return SDValue();
15727 }
15728
15729 auto IsVWMulOpc = [](unsigned Opc) {
15730 switch (Opc) {
15731 case RISCVISD::VWMUL_VL:
15734 return true;
15735 default:
15736 return false;
15737 }
15738 };
15739
15740 if (!IsVWMulOpc(MulOp.getOpcode()))
15741 std::swap(Addend, MulOp);
15742
15743 if (!IsVWMulOpc(MulOp.getOpcode()))
15744 return SDValue();
15745
15746 SDValue MulMergeOp = MulOp.getOperand(2);
15747
15748 if (!MulMergeOp.isUndef())
15749 return SDValue();
15750
15751 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15752 const RISCVSubtarget &Subtarget) {
15753 if (N->getOpcode() == ISD::ADD) {
15754 SDLoc DL(N);
15755 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15756 Subtarget);
15757 }
15758 return std::make_pair(N->getOperand(3), N->getOperand(4));
15759 }(N, DAG, Subtarget);
15760
15761 SDValue MulMask = MulOp.getOperand(3);
15762 SDValue MulVL = MulOp.getOperand(4);
15763
15764 if (AddMask != MulMask || AddVL != MulVL)
15765 return SDValue();
15766
15767 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15768 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15769 "Unexpected opcode after VWMACC_VL");
15770 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15771 "Unexpected opcode after VWMACC_VL!");
15772 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15773 "Unexpected opcode after VWMUL_VL!");
15774 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15775 "Unexpected opcode after VWMUL_VL!");
15776
15777 SDLoc DL(N);
15778 EVT VT = N->getValueType(0);
15779 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15780 AddVL};
15781 return DAG.getNode(Opc, DL, VT, Ops);
15782}
15783
15785 ISD::MemIndexType &IndexType,
15787 if (!DCI.isBeforeLegalize())
15788 return false;
15789
15790 SelectionDAG &DAG = DCI.DAG;
15791 const MVT XLenVT =
15792 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15793
15794 const EVT IndexVT = Index.getValueType();
15795
15796 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15797 // mode, so anything else must be manually legalized.
15798 if (!isIndexTypeSigned(IndexType))
15799 return false;
15800
15801 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
15802 // Any index legalization should first promote to XLenVT, so we don't lose
15803 // bits when scaling. This may create an illegal index type so we let
15804 // LLVM's legalization take care of the splitting.
15805 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15807 IndexVT.changeVectorElementType(XLenVT), Index);
15808 }
15809 IndexType = ISD::UNSIGNED_SCALED;
15810 return true;
15811}
15812
15813/// Match the index vector of a scatter or gather node as the shuffle mask
15814/// which performs the rearrangement if possible. Will only match if
15815/// all lanes are touched, and thus replacing the scatter or gather with
15816/// a unit strided access and shuffle is legal.
15818 SmallVector<int> &ShuffleMask) {
15819 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15820 return false;
15822 return false;
15823
15824 const unsigned ElementSize = VT.getScalarStoreSize();
15825 const unsigned NumElems = VT.getVectorNumElements();
15826
15827 // Create the shuffle mask and check all bits active
15828 assert(ShuffleMask.empty());
15829 BitVector ActiveLanes(NumElems);
15830 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15831 // TODO: We've found an active bit of UB, and could be
15832 // more aggressive here if desired.
15833 if (Index->getOperand(i)->isUndef())
15834 return false;
15835 uint64_t C = Index->getConstantOperandVal(i);
15836 if (C % ElementSize != 0)
15837 return false;
15838 C = C / ElementSize;
15839 if (C >= NumElems)
15840 return false;
15841 ShuffleMask.push_back(C);
15842 ActiveLanes.set(C);
15843 }
15844 return ActiveLanes.all();
15845}
15846
15847/// Match the index of a gather or scatter operation as an operation
15848/// with twice the element width and half the number of elements. This is
15849/// generally profitable (if legal) because these operations are linear
15850/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
15851/// come out ahead.
15853 Align BaseAlign, const RISCVSubtarget &ST) {
15854 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15855 return false;
15857 return false;
15858
15859 // Attempt a doubling. If we can use a element type 4x or 8x in
15860 // size, this will happen via multiply iterations of the transform.
15861 const unsigned NumElems = VT.getVectorNumElements();
15862 if (NumElems % 2 != 0)
15863 return false;
15864
15865 const unsigned ElementSize = VT.getScalarStoreSize();
15866 const unsigned WiderElementSize = ElementSize * 2;
15867 if (WiderElementSize > ST.getELen()/8)
15868 return false;
15869
15870 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
15871 return false;
15872
15873 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15874 // TODO: We've found an active bit of UB, and could be
15875 // more aggressive here if desired.
15876 if (Index->getOperand(i)->isUndef())
15877 return false;
15878 // TODO: This offset check is too strict if we support fully
15879 // misaligned memory operations.
15880 uint64_t C = Index->getConstantOperandVal(i);
15881 if (i % 2 == 0) {
15882 if (C % WiderElementSize != 0)
15883 return false;
15884 continue;
15885 }
15886 uint64_t Last = Index->getConstantOperandVal(i-1);
15887 if (C != Last + ElementSize)
15888 return false;
15889 }
15890 return true;
15891}
15892
15893
15895 DAGCombinerInfo &DCI) const {
15896 SelectionDAG &DAG = DCI.DAG;
15897 const MVT XLenVT = Subtarget.getXLenVT();
15898 SDLoc DL(N);
15899
15900 // Helper to call SimplifyDemandedBits on an operand of N where only some low
15901 // bits are demanded. N will be added to the Worklist if it was not deleted.
15902 // Caller should return SDValue(N, 0) if this returns true.
15903 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
15904 SDValue Op = N->getOperand(OpNo);
15905 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
15906 if (!SimplifyDemandedBits(Op, Mask, DCI))
15907 return false;
15908
15909 if (N->getOpcode() != ISD::DELETED_NODE)
15910 DCI.AddToWorklist(N);
15911 return true;
15912 };
15913
15914 switch (N->getOpcode()) {
15915 default:
15916 break;
15917 case RISCVISD::SplitF64: {
15918 SDValue Op0 = N->getOperand(0);
15919 // If the input to SplitF64 is just BuildPairF64 then the operation is
15920 // redundant. Instead, use BuildPairF64's operands directly.
15921 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
15922 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
15923
15924 if (Op0->isUndef()) {
15925 SDValue Lo = DAG.getUNDEF(MVT::i32);
15926 SDValue Hi = DAG.getUNDEF(MVT::i32);
15927 return DCI.CombineTo(N, Lo, Hi);
15928 }
15929
15930 // It's cheaper to materialise two 32-bit integers than to load a double
15931 // from the constant pool and transfer it to integer registers through the
15932 // stack.
15933 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
15934 APInt V = C->getValueAPF().bitcastToAPInt();
15935 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
15936 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
15937 return DCI.CombineTo(N, Lo, Hi);
15938 }
15939
15940 // This is a target-specific version of a DAGCombine performed in
15941 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15942 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15943 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15944 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
15945 !Op0.getNode()->hasOneUse())
15946 break;
15947 SDValue NewSplitF64 =
15948 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
15949 Op0.getOperand(0));
15950 SDValue Lo = NewSplitF64.getValue(0);
15951 SDValue Hi = NewSplitF64.getValue(1);
15952 APInt SignBit = APInt::getSignMask(32);
15953 if (Op0.getOpcode() == ISD::FNEG) {
15954 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
15955 DAG.getConstant(SignBit, DL, MVT::i32));
15956 return DCI.CombineTo(N, Lo, NewHi);
15957 }
15958 assert(Op0.getOpcode() == ISD::FABS);
15959 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
15960 DAG.getConstant(~SignBit, DL, MVT::i32));
15961 return DCI.CombineTo(N, Lo, NewHi);
15962 }
15963 case RISCVISD::SLLW:
15964 case RISCVISD::SRAW:
15965 case RISCVISD::SRLW:
15966 case RISCVISD::RORW:
15967 case RISCVISD::ROLW: {
15968 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
15969 if (SimplifyDemandedLowBitsHelper(0, 32) ||
15970 SimplifyDemandedLowBitsHelper(1, 5))
15971 return SDValue(N, 0);
15972
15973 break;
15974 }
15975 case RISCVISD::CLZW:
15976 case RISCVISD::CTZW: {
15977 // Only the lower 32 bits of the first operand are read
15978 if (SimplifyDemandedLowBitsHelper(0, 32))
15979 return SDValue(N, 0);
15980 break;
15981 }
15983 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
15984 // conversion is unnecessary and can be replaced with the
15985 // FMV_X_ANYEXTW_RV64 operand.
15986 SDValue Op0 = N->getOperand(0);
15988 return Op0.getOperand(0);
15989 break;
15990 }
15993 SDLoc DL(N);
15994 SDValue Op0 = N->getOperand(0);
15995 MVT VT = N->getSimpleValueType(0);
15996 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
15997 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
15998 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
15999 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16000 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16001 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16002 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16003 assert(Op0.getOperand(0).getValueType() == VT &&
16004 "Unexpected value type!");
16005 return Op0.getOperand(0);
16006 }
16007
16008 // This is a target-specific version of a DAGCombine performed in
16009 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16010 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16011 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16012 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16013 !Op0.getNode()->hasOneUse())
16014 break;
16015 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16016 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16017 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16018 if (Op0.getOpcode() == ISD::FNEG)
16019 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16020 DAG.getConstant(SignBit, DL, VT));
16021
16022 assert(Op0.getOpcode() == ISD::FABS);
16023 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16024 DAG.getConstant(~SignBit, DL, VT));
16025 }
16026 case ISD::ABS: {
16027 EVT VT = N->getValueType(0);
16028 SDValue N0 = N->getOperand(0);
16029 // abs (sext) -> zext (abs)
16030 // abs (zext) -> zext (handled elsewhere)
16031 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16032 SDValue Src = N0.getOperand(0);
16033 SDLoc DL(N);
16034 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16035 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16036 }
16037 break;
16038 }
16039 case ISD::ADD: {
16040 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16041 return V;
16042 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16043 return V;
16044 return performADDCombine(N, DAG, Subtarget);
16045 }
16046 case ISD::SUB: {
16047 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16048 return V;
16049 return performSUBCombine(N, DAG, Subtarget);
16050 }
16051 case ISD::AND:
16052 return performANDCombine(N, DCI, Subtarget);
16053 case ISD::OR: {
16054 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16055 return V;
16056 return performORCombine(N, DCI, Subtarget);
16057 }
16058 case ISD::XOR:
16059 return performXORCombine(N, DAG, Subtarget);
16060 case ISD::MUL:
16061 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16062 return V;
16063 return performMULCombine(N, DAG, DCI, Subtarget);
16064 case ISD::SDIV:
16065 case ISD::UDIV:
16066 case ISD::SREM:
16067 case ISD::UREM:
16068 if (SDValue V = combineBinOpOfZExt(N, DAG))
16069 return V;
16070 break;
16071 case ISD::FADD:
16072 case ISD::UMAX:
16073 case ISD::UMIN:
16074 case ISD::SMAX:
16075 case ISD::SMIN:
16076 case ISD::FMAXNUM:
16077 case ISD::FMINNUM: {
16078 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16079 return V;
16080 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16081 return V;
16082 return SDValue();
16083 }
16084 case ISD::SETCC:
16085 return performSETCCCombine(N, DAG, Subtarget);
16087 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16088 case ISD::ZERO_EXTEND:
16089 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16090 // type legalization. This is safe because fp_to_uint produces poison if
16091 // it overflows.
16092 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16093 SDValue Src = N->getOperand(0);
16094 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16095 isTypeLegal(Src.getOperand(0).getValueType()))
16096 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16097 Src.getOperand(0));
16098 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16099 isTypeLegal(Src.getOperand(1).getValueType())) {
16100 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16101 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16102 Src.getOperand(0), Src.getOperand(1));
16103 DCI.CombineTo(N, Res);
16104 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16105 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16106 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16107 }
16108 }
16109 return SDValue();
16111 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16112 // This would be benefit for the cases where X and Y are both the same value
16113 // type of low precision vectors. Since the truncate would be lowered into
16114 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16115 // restriction, such pattern would be expanded into a series of "vsetvli"
16116 // and "vnsrl" instructions later to reach this point.
16117 auto IsTruncNode = [](SDValue V) {
16118 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
16119 return false;
16120 SDValue VL = V.getOperand(2);
16121 auto *C = dyn_cast<ConstantSDNode>(VL);
16122 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
16123 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
16124 (isa<RegisterSDNode>(VL) &&
16125 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16126 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
16127 IsVLMAXForVMSET;
16128 };
16129
16130 SDValue Op = N->getOperand(0);
16131
16132 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16133 // to distinguish such pattern.
16134 while (IsTruncNode(Op)) {
16135 if (!Op.hasOneUse())
16136 return SDValue();
16137 Op = Op.getOperand(0);
16138 }
16139
16140 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
16141 SDValue N0 = Op.getOperand(0);
16142 SDValue N1 = Op.getOperand(1);
16143 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
16144 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
16145 SDValue N00 = N0.getOperand(0);
16146 SDValue N10 = N1.getOperand(0);
16147 if (N00.getValueType().isVector() &&
16148 N00.getValueType() == N10.getValueType() &&
16149 N->getValueType(0) == N10.getValueType()) {
16150 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16151 SDValue SMin = DAG.getNode(
16152 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16153 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16154 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16155 }
16156 }
16157 }
16158 break;
16159 }
16160 case ISD::TRUNCATE:
16161 return performTRUNCATECombine(N, DAG, Subtarget);
16162 case ISD::SELECT:
16163 return performSELECTCombine(N, DAG, Subtarget);
16166 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
16167 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
16168 if (N->getOperand(1).getOpcode() == ISD::XOR &&
16169 isOneConstant(N->getOperand(1).getOperand(1))) {
16170 SDValue Cond = N->getOperand(1).getOperand(0);
16171 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
16172 if (DAG.MaskedValueIsZero(Cond, Mask)) {
16173 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
16176 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
16177 N->getOperand(0), Cond);
16178 }
16179 }
16180 return SDValue();
16181
16182 case RISCVISD::SELECT_CC: {
16183 // Transform
16184 SDValue LHS = N->getOperand(0);
16185 SDValue RHS = N->getOperand(1);
16186 SDValue CC = N->getOperand(2);
16187 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16188 SDValue TrueV = N->getOperand(3);
16189 SDValue FalseV = N->getOperand(4);
16190 SDLoc DL(N);
16191 EVT VT = N->getValueType(0);
16192
16193 // If the True and False values are the same, we don't need a select_cc.
16194 if (TrueV == FalseV)
16195 return TrueV;
16196
16197 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16198 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16199 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16200 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16201 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16202 if (CCVal == ISD::CondCode::SETGE)
16203 std::swap(TrueV, FalseV);
16204
16205 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16206 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16207 // Only handle simm12, if it is not in this range, it can be considered as
16208 // register.
16209 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16210 isInt<12>(TrueSImm - FalseSImm)) {
16211 SDValue SRA =
16212 DAG.getNode(ISD::SRA, DL, VT, LHS,
16213 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16214 SDValue AND =
16215 DAG.getNode(ISD::AND, DL, VT, SRA,
16216 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16217 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16218 }
16219
16220 if (CCVal == ISD::CondCode::SETGE)
16221 std::swap(TrueV, FalseV);
16222 }
16223
16224 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16225 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16226 {LHS, RHS, CC, TrueV, FalseV});
16227
16228 if (!Subtarget.hasConditionalMoveFusion()) {
16229 // (select c, -1, y) -> -c | y
16230 if (isAllOnesConstant(TrueV)) {
16231 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16232 SDValue Neg = DAG.getNegative(C, DL, VT);
16233 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16234 }
16235 // (select c, y, -1) -> -!c | y
16236 if (isAllOnesConstant(FalseV)) {
16237 SDValue C =
16238 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16239 SDValue Neg = DAG.getNegative(C, DL, VT);
16240 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16241 }
16242
16243 // (select c, 0, y) -> -!c & y
16244 if (isNullConstant(TrueV)) {
16245 SDValue C =
16246 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16247 SDValue Neg = DAG.getNegative(C, DL, VT);
16248 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16249 }
16250 // (select c, y, 0) -> -c & y
16251 if (isNullConstant(FalseV)) {
16252 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16253 SDValue Neg = DAG.getNegative(C, DL, VT);
16254 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16255 }
16256 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16257 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16258 if (((isOneConstant(FalseV) && LHS == TrueV &&
16259 CCVal == ISD::CondCode::SETNE) ||
16260 (isOneConstant(TrueV) && LHS == FalseV &&
16261 CCVal == ISD::CondCode::SETEQ)) &&
16263 // freeze it to be safe.
16264 LHS = DAG.getFreeze(LHS);
16266 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16267 }
16268 }
16269
16270 // If both true/false are an xor with 1, pull through the select.
16271 // This can occur after op legalization if both operands are setccs that
16272 // require an xor to invert.
16273 // FIXME: Generalize to other binary ops with identical operand?
16274 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16275 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16276 isOneConstant(TrueV.getOperand(1)) &&
16277 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16278 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16279 TrueV.getOperand(0), FalseV.getOperand(0));
16280 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16281 }
16282
16283 return SDValue();
16284 }
16285 case RISCVISD::BR_CC: {
16286 SDValue LHS = N->getOperand(1);
16287 SDValue RHS = N->getOperand(2);
16288 SDValue CC = N->getOperand(3);
16289 SDLoc DL(N);
16290
16291 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16292 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16293 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16294
16295 return SDValue();
16296 }
16297 case ISD::BITREVERSE:
16298 return performBITREVERSECombine(N, DAG, Subtarget);
16299 case ISD::FP_TO_SINT:
16300 case ISD::FP_TO_UINT:
16301 return performFP_TO_INTCombine(N, DCI, Subtarget);
16304 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16305 case ISD::FCOPYSIGN: {
16306 EVT VT = N->getValueType(0);
16307 if (!VT.isVector())
16308 break;
16309 // There is a form of VFSGNJ which injects the negated sign of its second
16310 // operand. Try and bubble any FNEG up after the extend/round to produce
16311 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16312 // TRUNC=1.
16313 SDValue In2 = N->getOperand(1);
16314 // Avoid cases where the extend/round has multiple uses, as duplicating
16315 // those is typically more expensive than removing a fneg.
16316 if (!In2.hasOneUse())
16317 break;
16318 if (In2.getOpcode() != ISD::FP_EXTEND &&
16319 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16320 break;
16321 In2 = In2.getOperand(0);
16322 if (In2.getOpcode() != ISD::FNEG)
16323 break;
16324 SDLoc DL(N);
16325 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16326 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16327 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16328 }
16329 case ISD::MGATHER: {
16330 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
16331 const EVT VT = N->getValueType(0);
16332 SDValue Index = MGN->getIndex();
16333 SDValue ScaleOp = MGN->getScale();
16334 ISD::MemIndexType IndexType = MGN->getIndexType();
16335 assert(!MGN->isIndexScaled() &&
16336 "Scaled gather/scatter should not be formed");
16337
16338 SDLoc DL(N);
16339 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16340 return DAG.getMaskedGather(
16341 N->getVTList(), MGN->getMemoryVT(), DL,
16342 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16343 MGN->getBasePtr(), Index, ScaleOp},
16344 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16345
16346 if (narrowIndex(Index, IndexType, DAG))
16347 return DAG.getMaskedGather(
16348 N->getVTList(), MGN->getMemoryVT(), DL,
16349 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16350 MGN->getBasePtr(), Index, ScaleOp},
16351 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16352
16353 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16354 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16355 // The sequence will be XLenVT, not the type of Index. Tell
16356 // isSimpleVIDSequence this so we avoid overflow.
16357 if (std::optional<VIDSequence> SimpleVID =
16358 isSimpleVIDSequence(Index, Subtarget.getXLen());
16359 SimpleVID && SimpleVID->StepDenominator == 1) {
16360 const int64_t StepNumerator = SimpleVID->StepNumerator;
16361 const int64_t Addend = SimpleVID->Addend;
16362
16363 // Note: We don't need to check alignment here since (by assumption
16364 // from the existance of the gather), our offsets must be sufficiently
16365 // aligned.
16366
16367 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16368 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16369 assert(IndexType == ISD::UNSIGNED_SCALED);
16370 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16371 DAG.getConstant(Addend, DL, PtrVT));
16372
16373 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16374 SDValue IntID =
16375 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16376 XLenVT);
16377 SDValue Ops[] =
16378 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16379 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
16381 Ops, VT, MGN->getMemOperand());
16382 }
16383 }
16384
16385 SmallVector<int> ShuffleMask;
16386 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16387 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16388 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16389 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16390 MGN->getMask(), DAG.getUNDEF(VT),
16391 MGN->getMemoryVT(), MGN->getMemOperand(),
16393 SDValue Shuffle =
16394 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16395 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16396 }
16397
16398 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16399 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16400 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16401 SmallVector<SDValue> NewIndices;
16402 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16403 NewIndices.push_back(Index.getOperand(i));
16404 EVT IndexVT = Index.getValueType()
16406 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16407
16408 unsigned ElementSize = VT.getScalarStoreSize();
16409 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16410 auto EltCnt = VT.getVectorElementCount();
16411 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16412 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16413 EltCnt.divideCoefficientBy(2));
16414 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16415 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16416 EltCnt.divideCoefficientBy(2));
16417 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16418
16419 SDValue Gather =
16420 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16421 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16422 Index, ScaleOp},
16423 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16424 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16425 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16426 }
16427 break;
16428 }
16429 case ISD::MSCATTER:{
16430 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
16431 SDValue Index = MSN->getIndex();
16432 SDValue ScaleOp = MSN->getScale();
16433 ISD::MemIndexType IndexType = MSN->getIndexType();
16434 assert(!MSN->isIndexScaled() &&
16435 "Scaled gather/scatter should not be formed");
16436
16437 SDLoc DL(N);
16438 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16439 return DAG.getMaskedScatter(
16440 N->getVTList(), MSN->getMemoryVT(), DL,
16441 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16442 Index, ScaleOp},
16443 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16444
16445 if (narrowIndex(Index, IndexType, DAG))
16446 return DAG.getMaskedScatter(
16447 N->getVTList(), MSN->getMemoryVT(), DL,
16448 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16449 Index, ScaleOp},
16450 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16451
16452 EVT VT = MSN->getValue()->getValueType(0);
16453 SmallVector<int> ShuffleMask;
16454 if (!MSN->isTruncatingStore() &&
16455 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16456 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16457 DAG.getUNDEF(VT), ShuffleMask);
16458 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16459 DAG.getUNDEF(XLenVT), MSN->getMask(),
16460 MSN->getMemoryVT(), MSN->getMemOperand(),
16461 ISD::UNINDEXED, false);
16462 }
16463 break;
16464 }
16465 case ISD::VP_GATHER: {
16466 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
16467 SDValue Index = VPGN->getIndex();
16468 SDValue ScaleOp = VPGN->getScale();
16469 ISD::MemIndexType IndexType = VPGN->getIndexType();
16470 assert(!VPGN->isIndexScaled() &&
16471 "Scaled gather/scatter should not be formed");
16472
16473 SDLoc DL(N);
16474 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16475 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16476 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16477 ScaleOp, VPGN->getMask(),
16478 VPGN->getVectorLength()},
16479 VPGN->getMemOperand(), IndexType);
16480
16481 if (narrowIndex(Index, IndexType, DAG))
16482 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16483 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16484 ScaleOp, VPGN->getMask(),
16485 VPGN->getVectorLength()},
16486 VPGN->getMemOperand(), IndexType);
16487
16488 break;
16489 }
16490 case ISD::VP_SCATTER: {
16491 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
16492 SDValue Index = VPSN->getIndex();
16493 SDValue ScaleOp = VPSN->getScale();
16494 ISD::MemIndexType IndexType = VPSN->getIndexType();
16495 assert(!VPSN->isIndexScaled() &&
16496 "Scaled gather/scatter should not be formed");
16497
16498 SDLoc DL(N);
16499 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16500 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16501 {VPSN->getChain(), VPSN->getValue(),
16502 VPSN->getBasePtr(), Index, ScaleOp,
16503 VPSN->getMask(), VPSN->getVectorLength()},
16504 VPSN->getMemOperand(), IndexType);
16505
16506 if (narrowIndex(Index, IndexType, DAG))
16507 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16508 {VPSN->getChain(), VPSN->getValue(),
16509 VPSN->getBasePtr(), Index, ScaleOp,
16510 VPSN->getMask(), VPSN->getVectorLength()},
16511 VPSN->getMemOperand(), IndexType);
16512 break;
16513 }
16514 case RISCVISD::SHL_VL:
16515 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16516 return V;
16517 [[fallthrough]];
16518 case RISCVISD::SRA_VL:
16519 case RISCVISD::SRL_VL: {
16520 SDValue ShAmt = N->getOperand(1);
16522 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16523 SDLoc DL(N);
16524 SDValue VL = N->getOperand(4);
16525 EVT VT = N->getValueType(0);
16526 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16527 ShAmt.getOperand(1), VL);
16528 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16529 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16530 }
16531 break;
16532 }
16533 case ISD::SRA:
16534 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16535 return V;
16536 [[fallthrough]];
16537 case ISD::SRL:
16538 case ISD::SHL: {
16539 if (N->getOpcode() == ISD::SHL) {
16540 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16541 return V;
16542 }
16543 SDValue ShAmt = N->getOperand(1);
16545 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16546 SDLoc DL(N);
16547 EVT VT = N->getValueType(0);
16548 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16549 ShAmt.getOperand(1),
16550 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16551 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16552 }
16553 break;
16554 }
16555 case RISCVISD::ADD_VL:
16556 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16557 return V;
16558 return combineToVWMACC(N, DAG, Subtarget);
16563 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16564 case RISCVISD::SUB_VL:
16565 case RISCVISD::MUL_VL:
16566 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16575 return performVFMADD_VLCombine(N, DAG, Subtarget);
16576 case RISCVISD::FADD_VL:
16577 case RISCVISD::FSUB_VL:
16578 case RISCVISD::FMUL_VL:
16580 case RISCVISD::VFWSUB_W_VL: {
16581 if (N->getValueType(0).isScalableVector() &&
16582 N->getValueType(0).getVectorElementType() == MVT::f32 &&
16583 (Subtarget.hasVInstructionsF16Minimal() &&
16584 !Subtarget.hasVInstructionsF16()))
16585 return SDValue();
16586 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16587 }
16588 case ISD::LOAD:
16589 case ISD::STORE: {
16590 if (DCI.isAfterLegalizeDAG())
16591 if (SDValue V = performMemPairCombine(N, DCI))
16592 return V;
16593
16594 if (N->getOpcode() != ISD::STORE)
16595 break;
16596
16597 auto *Store = cast<StoreSDNode>(N);
16598 SDValue Chain = Store->getChain();
16599 EVT MemVT = Store->getMemoryVT();
16600 SDValue Val = Store->getValue();
16601 SDLoc DL(N);
16602
16603 bool IsScalarizable =
16604 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
16605 Store->isSimple() &&
16606 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
16607 isPowerOf2_64(MemVT.getSizeInBits()) &&
16608 MemVT.getSizeInBits() <= Subtarget.getXLen();
16609
16610 // If sufficiently aligned we can scalarize stores of constant vectors of
16611 // any power-of-two size up to XLen bits, provided that they aren't too
16612 // expensive to materialize.
16613 // vsetivli zero, 2, e8, m1, ta, ma
16614 // vmv.v.i v8, 4
16615 // vse64.v v8, (a0)
16616 // ->
16617 // li a1, 1028
16618 // sh a1, 0(a0)
16619 if (DCI.isBeforeLegalize() && IsScalarizable &&
16621 // Get the constant vector bits
16622 APInt NewC(Val.getValueSizeInBits(), 0);
16623 uint64_t EltSize = Val.getScalarValueSizeInBits();
16624 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
16625 if (Val.getOperand(i).isUndef())
16626 continue;
16627 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
16628 i * EltSize);
16629 }
16630 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16631
16632 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16633 true) <= 2 &&
16635 NewVT, *Store->getMemOperand())) {
16636 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
16637 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
16638 Store->getPointerInfo(), Store->getOriginalAlign(),
16639 Store->getMemOperand()->getFlags());
16640 }
16641 }
16642
16643 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16644 // vsetivli zero, 2, e16, m1, ta, ma
16645 // vle16.v v8, (a0)
16646 // vse16.v v8, (a1)
16647 if (auto *L = dyn_cast<LoadSDNode>(Val);
16648 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16649 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
16650 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
16651 L->getMemoryVT() == MemVT) {
16652 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16654 NewVT, *Store->getMemOperand()) &&
16656 NewVT, *L->getMemOperand())) {
16657 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
16658 L->getPointerInfo(), L->getOriginalAlign(),
16659 L->getMemOperand()->getFlags());
16660 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
16661 Store->getPointerInfo(), Store->getOriginalAlign(),
16662 Store->getMemOperand()->getFlags());
16663 }
16664 }
16665
16666 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
16667 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
16668 // any illegal types.
16669 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
16670 (DCI.isAfterLegalizeDAG() &&
16672 isNullConstant(Val.getOperand(1)))) {
16673 SDValue Src = Val.getOperand(0);
16674 MVT VecVT = Src.getSimpleValueType();
16675 // VecVT should be scalable and memory VT should match the element type.
16676 if (!Store->isIndexed() && VecVT.isScalableVector() &&
16677 MemVT == VecVT.getVectorElementType()) {
16678 SDLoc DL(N);
16679 MVT MaskVT = getMaskTypeFor(VecVT);
16680 return DAG.getStoreVP(
16681 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
16682 DAG.getConstant(1, DL, MaskVT),
16683 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
16684 Store->getMemOperand(), Store->getAddressingMode(),
16685 Store->isTruncatingStore(), /*IsCompress*/ false);
16686 }
16687 }
16688
16689 break;
16690 }
16691 case ISD::SPLAT_VECTOR: {
16692 EVT VT = N->getValueType(0);
16693 // Only perform this combine on legal MVT types.
16694 if (!isTypeLegal(VT))
16695 break;
16696 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
16697 DAG, Subtarget))
16698 return Gather;
16699 break;
16700 }
16701 case ISD::BUILD_VECTOR:
16702 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
16703 return V;
16704 break;
16706 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
16707 return V;
16708 break;
16710 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
16711 return V;
16712 break;
16713 case RISCVISD::VFMV_V_F_VL: {
16714 const MVT VT = N->getSimpleValueType(0);
16715 SDValue Passthru = N->getOperand(0);
16716 SDValue Scalar = N->getOperand(1);
16717 SDValue VL = N->getOperand(2);
16718
16719 // If VL is 1, we can use vfmv.s.f.
16720 if (isOneConstant(VL))
16721 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
16722 break;
16723 }
16724 case RISCVISD::VMV_V_X_VL: {
16725 const MVT VT = N->getSimpleValueType(0);
16726 SDValue Passthru = N->getOperand(0);
16727 SDValue Scalar = N->getOperand(1);
16728 SDValue VL = N->getOperand(2);
16729
16730 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
16731 // scalar input.
16732 unsigned ScalarSize = Scalar.getValueSizeInBits();
16733 unsigned EltWidth = VT.getScalarSizeInBits();
16734 if (ScalarSize > EltWidth && Passthru.isUndef())
16735 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
16736 return SDValue(N, 0);
16737
16738 // If VL is 1 and the scalar value won't benefit from immediate, we can
16739 // use vmv.s.x.
16740 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16741 if (isOneConstant(VL) &&
16742 (!Const || Const->isZero() ||
16743 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
16744 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
16745
16746 break;
16747 }
16748 case RISCVISD::VFMV_S_F_VL: {
16749 SDValue Src = N->getOperand(1);
16750 // Try to remove vector->scalar->vector if the scalar->vector is inserting
16751 // into an undef vector.
16752 // TODO: Could use a vslide or vmv.v.v for non-undef.
16753 if (N->getOperand(0).isUndef() &&
16754 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16755 isNullConstant(Src.getOperand(1)) &&
16756 Src.getOperand(0).getValueType().isScalableVector()) {
16757 EVT VT = N->getValueType(0);
16758 EVT SrcVT = Src.getOperand(0).getValueType();
16760 // Widths match, just return the original vector.
16761 if (SrcVT == VT)
16762 return Src.getOperand(0);
16763 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
16764 }
16765 [[fallthrough]];
16766 }
16767 case RISCVISD::VMV_S_X_VL: {
16768 const MVT VT = N->getSimpleValueType(0);
16769 SDValue Passthru = N->getOperand(0);
16770 SDValue Scalar = N->getOperand(1);
16771 SDValue VL = N->getOperand(2);
16772
16773 // Use M1 or smaller to avoid over constraining register allocation
16774 const MVT M1VT = getLMUL1VT(VT);
16775 if (M1VT.bitsLT(VT)) {
16776 SDValue M1Passthru =
16777 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
16778 DAG.getVectorIdxConstant(0, DL));
16779 SDValue Result =
16780 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
16781 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
16782 DAG.getVectorIdxConstant(0, DL));
16783 return Result;
16784 }
16785
16786 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
16787 // higher would involve overly constraining the register allocator for
16788 // no purpose.
16789 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16790 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
16791 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
16792 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
16793
16794 break;
16795 }
16796 case RISCVISD::VMV_X_S: {
16797 SDValue Vec = N->getOperand(0);
16798 MVT VecVT = N->getOperand(0).getSimpleValueType();
16799 const MVT M1VT = getLMUL1VT(VecVT);
16800 if (M1VT.bitsLT(VecVT)) {
16801 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
16802 DAG.getVectorIdxConstant(0, DL));
16803 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
16804 }
16805 break;
16806 }
16810 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
16811 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
16812 switch (IntNo) {
16813 // By default we do not combine any intrinsic.
16814 default:
16815 return SDValue();
16816 case Intrinsic::riscv_masked_strided_load: {
16817 MVT VT = N->getSimpleValueType(0);
16818 auto *Load = cast<MemIntrinsicSDNode>(N);
16819 SDValue PassThru = N->getOperand(2);
16820 SDValue Base = N->getOperand(3);
16821 SDValue Stride = N->getOperand(4);
16822 SDValue Mask = N->getOperand(5);
16823
16824 // If the stride is equal to the element size in bytes, we can use
16825 // a masked.load.
16826 const unsigned ElementSize = VT.getScalarStoreSize();
16827 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16828 StrideC && StrideC->getZExtValue() == ElementSize)
16829 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
16830 DAG.getUNDEF(XLenVT), Mask, PassThru,
16831 Load->getMemoryVT(), Load->getMemOperand(),
16833 return SDValue();
16834 }
16835 case Intrinsic::riscv_masked_strided_store: {
16836 auto *Store = cast<MemIntrinsicSDNode>(N);
16837 SDValue Value = N->getOperand(2);
16838 SDValue Base = N->getOperand(3);
16839 SDValue Stride = N->getOperand(4);
16840 SDValue Mask = N->getOperand(5);
16841
16842 // If the stride is equal to the element size in bytes, we can use
16843 // a masked.store.
16844 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
16845 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16846 StrideC && StrideC->getZExtValue() == ElementSize)
16847 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
16848 DAG.getUNDEF(XLenVT), Mask,
16849 Value.getValueType(), Store->getMemOperand(),
16850 ISD::UNINDEXED, false);
16851 return SDValue();
16852 }
16853 case Intrinsic::riscv_vcpop:
16854 case Intrinsic::riscv_vcpop_mask:
16855 case Intrinsic::riscv_vfirst:
16856 case Intrinsic::riscv_vfirst_mask: {
16857 SDValue VL = N->getOperand(2);
16858 if (IntNo == Intrinsic::riscv_vcpop_mask ||
16859 IntNo == Intrinsic::riscv_vfirst_mask)
16860 VL = N->getOperand(3);
16861 if (!isNullConstant(VL))
16862 return SDValue();
16863 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
16864 SDLoc DL(N);
16865 EVT VT = N->getValueType(0);
16866 if (IntNo == Intrinsic::riscv_vfirst ||
16867 IntNo == Intrinsic::riscv_vfirst_mask)
16868 return DAG.getConstant(-1, DL, VT);
16869 return DAG.getConstant(0, DL, VT);
16870 }
16871 }
16872 }
16873 case ISD::BITCAST: {
16875 SDValue N0 = N->getOperand(0);
16876 EVT VT = N->getValueType(0);
16877 EVT SrcVT = N0.getValueType();
16878 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
16879 // type, widen both sides to avoid a trip through memory.
16880 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
16881 VT.isScalarInteger()) {
16882 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
16883 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
16884 Ops[0] = N0;
16885 SDLoc DL(N);
16886 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
16887 N0 = DAG.getBitcast(MVT::i8, N0);
16888 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
16889 }
16890
16891 return SDValue();
16892 }
16893 }
16894
16895 return SDValue();
16896}
16897
16899 EVT XVT, unsigned KeptBits) const {
16900 // For vectors, we don't have a preference..
16901 if (XVT.isVector())
16902 return false;
16903
16904 if (XVT != MVT::i32 && XVT != MVT::i64)
16905 return false;
16906
16907 // We can use sext.w for RV64 or an srai 31 on RV32.
16908 if (KeptBits == 32 || KeptBits == 64)
16909 return true;
16910
16911 // With Zbb we can use sext.h/sext.b.
16912 return Subtarget.hasStdExtZbb() &&
16913 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
16914 KeptBits == 16);
16915}
16916
16918 const SDNode *N, CombineLevel Level) const {
16919 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
16920 N->getOpcode() == ISD::SRL) &&
16921 "Expected shift op");
16922
16923 // The following folds are only desirable if `(OP _, c1 << c2)` can be
16924 // materialised in fewer instructions than `(OP _, c1)`:
16925 //
16926 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
16927 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
16928 SDValue N0 = N->getOperand(0);
16929 EVT Ty = N0.getValueType();
16930 if (Ty.isScalarInteger() &&
16931 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
16932 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16933 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16934 if (C1 && C2) {
16935 const APInt &C1Int = C1->getAPIntValue();
16936 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
16937
16938 // We can materialise `c1 << c2` into an add immediate, so it's "free",
16939 // and the combine should happen, to potentially allow further combines
16940 // later.
16941 if (ShiftedC1Int.getSignificantBits() <= 64 &&
16942 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
16943 return true;
16944
16945 // We can materialise `c1` in an add immediate, so it's "free", and the
16946 // combine should be prevented.
16947 if (C1Int.getSignificantBits() <= 64 &&
16949 return false;
16950
16951 // Neither constant will fit into an immediate, so find materialisation
16952 // costs.
16953 int C1Cost =
16954 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
16955 /*CompressionCost*/ true);
16956 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
16957 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
16958 /*CompressionCost*/ true);
16959
16960 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
16961 // combine should be prevented.
16962 if (C1Cost < ShiftedC1Cost)
16963 return false;
16964 }
16965 }
16966 return true;
16967}
16968
16970 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
16971 TargetLoweringOpt &TLO) const {
16972 // Delay this optimization as late as possible.
16973 if (!TLO.LegalOps)
16974 return false;
16975
16976 EVT VT = Op.getValueType();
16977 if (VT.isVector())
16978 return false;
16979
16980 unsigned Opcode = Op.getOpcode();
16981 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
16982 return false;
16983
16984 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16985 if (!C)
16986 return false;
16987
16988 const APInt &Mask = C->getAPIntValue();
16989
16990 // Clear all non-demanded bits initially.
16991 APInt ShrunkMask = Mask & DemandedBits;
16992
16993 // Try to make a smaller immediate by setting undemanded bits.
16994
16995 APInt ExpandedMask = Mask | ~DemandedBits;
16996
16997 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
16998 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
16999 };
17000 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17001 if (NewMask == Mask)
17002 return true;
17003 SDLoc DL(Op);
17004 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17005 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17006 Op.getOperand(0), NewC);
17007 return TLO.CombineTo(Op, NewOp);
17008 };
17009
17010 // If the shrunk mask fits in sign extended 12 bits, let the target
17011 // independent code apply it.
17012 if (ShrunkMask.isSignedIntN(12))
17013 return false;
17014
17015 // And has a few special cases for zext.
17016 if (Opcode == ISD::AND) {
17017 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17018 // otherwise use SLLI + SRLI.
17019 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17020 if (IsLegalMask(NewMask))
17021 return UseMask(NewMask);
17022
17023 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17024 if (VT == MVT::i64) {
17025 APInt NewMask = APInt(64, 0xffffffff);
17026 if (IsLegalMask(NewMask))
17027 return UseMask(NewMask);
17028 }
17029 }
17030
17031 // For the remaining optimizations, we need to be able to make a negative
17032 // number through a combination of mask and undemanded bits.
17033 if (!ExpandedMask.isNegative())
17034 return false;
17035
17036 // What is the fewest number of bits we need to represent the negative number.
17037 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17038
17039 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17040 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17041 // If we can't create a simm12, we shouldn't change opaque constants.
17042 APInt NewMask = ShrunkMask;
17043 if (MinSignedBits <= 12)
17044 NewMask.setBitsFrom(11);
17045 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17046 NewMask.setBitsFrom(31);
17047 else
17048 return false;
17049
17050 // Check that our new mask is a subset of the demanded mask.
17051 assert(IsLegalMask(NewMask));
17052 return UseMask(NewMask);
17053}
17054
17055static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17056 static const uint64_t GREVMasks[] = {
17057 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17058 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17059
17060 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17061 unsigned Shift = 1 << Stage;
17062 if (ShAmt & Shift) {
17063 uint64_t Mask = GREVMasks[Stage];
17064 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17065 if (IsGORC)
17066 Res |= x;
17067 x = Res;
17068 }
17069 }
17070
17071 return x;
17072}
17073
17075 KnownBits &Known,
17076 const APInt &DemandedElts,
17077 const SelectionDAG &DAG,
17078 unsigned Depth) const {
17079 unsigned BitWidth = Known.getBitWidth();
17080 unsigned Opc = Op.getOpcode();
17081 assert((Opc >= ISD::BUILTIN_OP_END ||
17082 Opc == ISD::INTRINSIC_WO_CHAIN ||
17083 Opc == ISD::INTRINSIC_W_CHAIN ||
17084 Opc == ISD::INTRINSIC_VOID) &&
17085 "Should use MaskedValueIsZero if you don't know whether Op"
17086 " is a target node!");
17087
17088 Known.resetAll();
17089 switch (Opc) {
17090 default: break;
17091 case RISCVISD::SELECT_CC: {
17092 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17093 // If we don't know any bits, early out.
17094 if (Known.isUnknown())
17095 break;
17096 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17097
17098 // Only known if known in both the LHS and RHS.
17099 Known = Known.intersectWith(Known2);
17100 break;
17101 }
17104 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17105 // Result is either all zero or operand 0. We can propagate zeros, but not
17106 // ones.
17107 Known.One.clearAllBits();
17108 break;
17109 case RISCVISD::REMUW: {
17110 KnownBits Known2;
17111 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17112 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17113 // We only care about the lower 32 bits.
17114 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17115 // Restore the original width by sign extending.
17116 Known = Known.sext(BitWidth);
17117 break;
17118 }
17119 case RISCVISD::DIVUW: {
17120 KnownBits Known2;
17121 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17122 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17123 // We only care about the lower 32 bits.
17124 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17125 // Restore the original width by sign extending.
17126 Known = Known.sext(BitWidth);
17127 break;
17128 }
17129 case RISCVISD::SLLW: {
17130 KnownBits Known2;
17131 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17132 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17133 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17134 // Restore the original width by sign extending.
17135 Known = Known.sext(BitWidth);
17136 break;
17137 }
17138 case RISCVISD::CTZW: {
17139 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17140 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17141 unsigned LowBits = llvm::bit_width(PossibleTZ);
17142 Known.Zero.setBitsFrom(LowBits);
17143 break;
17144 }
17145 case RISCVISD::CLZW: {
17146 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17147 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17148 unsigned LowBits = llvm::bit_width(PossibleLZ);
17149 Known.Zero.setBitsFrom(LowBits);
17150 break;
17151 }
17152 case RISCVISD::BREV8:
17153 case RISCVISD::ORC_B: {
17154 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17155 // control value of 7 is equivalent to brev8 and orc.b.
17156 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17157 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17158 // To compute zeros, we need to invert the value and invert it back after.
17159 Known.Zero =
17160 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17161 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17162 break;
17163 }
17164 case RISCVISD::READ_VLENB: {
17165 // We can use the minimum and maximum VLEN values to bound VLENB. We
17166 // know VLEN must be a power of two.
17167 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17168 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17169 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17170 Known.Zero.setLowBits(Log2_32(MinVLenB));
17171 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17172 if (MaxVLenB == MinVLenB)
17173 Known.One.setBit(Log2_32(MinVLenB));
17174 break;
17175 }
17176 case RISCVISD::FCLASS: {
17177 // fclass will only set one of the low 10 bits.
17178 Known.Zero.setBitsFrom(10);
17179 break;
17180 }
17183 unsigned IntNo =
17184 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17185 switch (IntNo) {
17186 default:
17187 // We can't do anything for most intrinsics.
17188 break;
17189 case Intrinsic::riscv_vsetvli:
17190 case Intrinsic::riscv_vsetvlimax: {
17191 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17192 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17193 RISCVII::VLMUL VLMUL =
17194 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17195 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17196 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17197 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17198 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17199
17200 // Result of vsetvli must be not larger than AVL.
17201 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17202 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17203
17204 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17205 if (BitWidth > KnownZeroFirstBit)
17206 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17207 break;
17208 }
17209 }
17210 break;
17211 }
17212 }
17213}
17214
17216 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17217 unsigned Depth) const {
17218 switch (Op.getOpcode()) {
17219 default:
17220 break;
17221 case RISCVISD::SELECT_CC: {
17222 unsigned Tmp =
17223 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17224 if (Tmp == 1) return 1; // Early out.
17225 unsigned Tmp2 =
17226 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17227 return std::min(Tmp, Tmp2);
17228 }
17231 // Output is either all zero or operand 0. We can propagate sign bit count
17232 // from operand 0.
17233 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17234 case RISCVISD::ABSW: {
17235 // We expand this at isel to negw+max. The result will have 33 sign bits
17236 // if the input has at least 33 sign bits.
17237 unsigned Tmp =
17238 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17239 if (Tmp < 33) return 1;
17240 return 33;
17241 }
17242 case RISCVISD::SLLW:
17243 case RISCVISD::SRAW:
17244 case RISCVISD::SRLW:
17245 case RISCVISD::DIVW:
17246 case RISCVISD::DIVUW:
17247 case RISCVISD::REMUW:
17248 case RISCVISD::ROLW:
17249 case RISCVISD::RORW:
17254 // TODO: As the result is sign-extended, this is conservatively correct. A
17255 // more precise answer could be calculated for SRAW depending on known
17256 // bits in the shift amount.
17257 return 33;
17258 case RISCVISD::VMV_X_S: {
17259 // The number of sign bits of the scalar result is computed by obtaining the
17260 // element type of the input vector operand, subtracting its width from the
17261 // XLEN, and then adding one (sign bit within the element type). If the
17262 // element type is wider than XLen, the least-significant XLEN bits are
17263 // taken.
17264 unsigned XLen = Subtarget.getXLen();
17265 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17266 if (EltBits <= XLen)
17267 return XLen - EltBits + 1;
17268 break;
17269 }
17271 unsigned IntNo = Op.getConstantOperandVal(1);
17272 switch (IntNo) {
17273 default:
17274 break;
17275 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17276 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17277 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17278 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17279 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17280 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17281 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17282 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17283 case Intrinsic::riscv_masked_cmpxchg_i64:
17284 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17285 // narrow atomic operation. These are implemented using atomic
17286 // operations at the minimum supported atomicrmw/cmpxchg width whose
17287 // result is then sign extended to XLEN. With +A, the minimum width is
17288 // 32 for both 64 and 32.
17289 assert(Subtarget.getXLen() == 64);
17291 assert(Subtarget.hasStdExtA());
17292 return 33;
17293 }
17294 break;
17295 }
17296 }
17297
17298 return 1;
17299}
17300
17302 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17303 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17304
17305 // TODO: Add more target nodes.
17306 switch (Op.getOpcode()) {
17308 // Integer select_cc cannot create poison.
17309 // TODO: What are the FP poison semantics?
17310 // TODO: This instruction blocks poison from the unselected operand, can
17311 // we do anything with that?
17312 return !Op.getValueType().isInteger();
17313 }
17315 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17316}
17317
17318const Constant *
17320 assert(Ld && "Unexpected null LoadSDNode");
17321 if (!ISD::isNormalLoad(Ld))
17322 return nullptr;
17323
17324 SDValue Ptr = Ld->getBasePtr();
17325
17326 // Only constant pools with no offset are supported.
17327 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17328 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17329 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17330 CNode->getOffset() != 0)
17331 return nullptr;
17332
17333 return CNode;
17334 };
17335
17336 // Simple case, LLA.
17337 if (Ptr.getOpcode() == RISCVISD::LLA) {
17338 auto *CNode = GetSupportedConstantPool(Ptr);
17339 if (!CNode || CNode->getTargetFlags() != 0)
17340 return nullptr;
17341
17342 return CNode->getConstVal();
17343 }
17344
17345 // Look for a HI and ADD_LO pair.
17346 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17347 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17348 return nullptr;
17349
17350 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17351 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17352
17353 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17354 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17355 return nullptr;
17356
17357 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17358 return nullptr;
17359
17360 return CNodeLo->getConstVal();
17361}
17362
17364 MachineBasicBlock *BB) {
17365 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17366
17367 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17368 // Should the count have wrapped while it was being read, we need to try
17369 // again.
17370 // For example:
17371 // ```
17372 // read:
17373 // csrrs x3, counterh # load high word of counter
17374 // csrrs x2, counter # load low word of counter
17375 // csrrs x4, counterh # load high word of counter
17376 // bne x3, x4, read # check if high word reads match, otherwise try again
17377 // ```
17378
17379 MachineFunction &MF = *BB->getParent();
17380 const BasicBlock *LLVMBB = BB->getBasicBlock();
17382
17383 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17384 MF.insert(It, LoopMBB);
17385
17386 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17387 MF.insert(It, DoneMBB);
17388
17389 // Transfer the remainder of BB and its successor edges to DoneMBB.
17390 DoneMBB->splice(DoneMBB->begin(), BB,
17391 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17393
17394 BB->addSuccessor(LoopMBB);
17395
17397 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17398 Register LoReg = MI.getOperand(0).getReg();
17399 Register HiReg = MI.getOperand(1).getReg();
17400 int64_t LoCounter = MI.getOperand(2).getImm();
17401 int64_t HiCounter = MI.getOperand(3).getImm();
17402 DebugLoc DL = MI.getDebugLoc();
17403
17405 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17406 .addImm(HiCounter)
17407 .addReg(RISCV::X0);
17408 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17409 .addImm(LoCounter)
17410 .addReg(RISCV::X0);
17411 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17412 .addImm(HiCounter)
17413 .addReg(RISCV::X0);
17414
17415 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17416 .addReg(HiReg)
17417 .addReg(ReadAgainReg)
17418 .addMBB(LoopMBB);
17419
17420 LoopMBB->addSuccessor(LoopMBB);
17421 LoopMBB->addSuccessor(DoneMBB);
17422
17423 MI.eraseFromParent();
17424
17425 return DoneMBB;
17426}
17427
17430 const RISCVSubtarget &Subtarget) {
17431 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17432
17433 MachineFunction &MF = *BB->getParent();
17434 DebugLoc DL = MI.getDebugLoc();
17437 Register LoReg = MI.getOperand(0).getReg();
17438 Register HiReg = MI.getOperand(1).getReg();
17439 Register SrcReg = MI.getOperand(2).getReg();
17440
17441 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17442 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17443
17444 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17445 RI, Register());
17447 MachineMemOperand *MMOLo =
17451 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17452 .addFrameIndex(FI)
17453 .addImm(0)
17454 .addMemOperand(MMOLo);
17455 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17456 .addFrameIndex(FI)
17457 .addImm(4)
17458 .addMemOperand(MMOHi);
17459 MI.eraseFromParent(); // The pseudo instruction is gone now.
17460 return BB;
17461}
17462
17465 const RISCVSubtarget &Subtarget) {
17466 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17467 "Unexpected instruction");
17468
17469 MachineFunction &MF = *BB->getParent();
17470 DebugLoc DL = MI.getDebugLoc();
17473 Register DstReg = MI.getOperand(0).getReg();
17474 Register LoReg = MI.getOperand(1).getReg();
17475 Register HiReg = MI.getOperand(2).getReg();
17476
17477 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17478 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17479
17481 MachineMemOperand *MMOLo =
17485 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17486 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17487 .addFrameIndex(FI)
17488 .addImm(0)
17489 .addMemOperand(MMOLo);
17490 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17491 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17492 .addFrameIndex(FI)
17493 .addImm(4)
17494 .addMemOperand(MMOHi);
17495 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17496 MI.eraseFromParent(); // The pseudo instruction is gone now.
17497 return BB;
17498}
17499
17501 switch (MI.getOpcode()) {
17502 default:
17503 return false;
17504 case RISCV::Select_GPR_Using_CC_GPR:
17505 case RISCV::Select_FPR16_Using_CC_GPR:
17506 case RISCV::Select_FPR16INX_Using_CC_GPR:
17507 case RISCV::Select_FPR32_Using_CC_GPR:
17508 case RISCV::Select_FPR32INX_Using_CC_GPR:
17509 case RISCV::Select_FPR64_Using_CC_GPR:
17510 case RISCV::Select_FPR64INX_Using_CC_GPR:
17511 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17512 return true;
17513 }
17514}
17515
17517 unsigned RelOpcode, unsigned EqOpcode,
17518 const RISCVSubtarget &Subtarget) {
17519 DebugLoc DL = MI.getDebugLoc();
17520 Register DstReg = MI.getOperand(0).getReg();
17521 Register Src1Reg = MI.getOperand(1).getReg();
17522 Register Src2Reg = MI.getOperand(2).getReg();
17524 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17526
17527 // Save the current FFLAGS.
17528 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17529
17530 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17531 .addReg(Src1Reg)
17532 .addReg(Src2Reg);
17535
17536 // Restore the FFLAGS.
17537 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17538 .addReg(SavedFFlags, RegState::Kill);
17539
17540 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17541 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17542 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17543 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17546
17547 // Erase the pseudoinstruction.
17548 MI.eraseFromParent();
17549 return BB;
17550}
17551
17552static MachineBasicBlock *
17554 MachineBasicBlock *ThisMBB,
17555 const RISCVSubtarget &Subtarget) {
17556 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17557 // Without this, custom-inserter would have generated:
17558 //
17559 // A
17560 // | \
17561 // | B
17562 // | /
17563 // C
17564 // | \
17565 // | D
17566 // | /
17567 // E
17568 //
17569 // A: X = ...; Y = ...
17570 // B: empty
17571 // C: Z = PHI [X, A], [Y, B]
17572 // D: empty
17573 // E: PHI [X, C], [Z, D]
17574 //
17575 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17576 //
17577 // A
17578 // | \
17579 // | C
17580 // | /|
17581 // |/ |
17582 // | |
17583 // | D
17584 // | /
17585 // E
17586 //
17587 // A: X = ...; Y = ...
17588 // D: empty
17589 // E: PHI [X, A], [X, C], [Y, D]
17590
17591 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17592 const DebugLoc &DL = First.getDebugLoc();
17593 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17594 MachineFunction *F = ThisMBB->getParent();
17595 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17596 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17597 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17598 MachineFunction::iterator It = ++ThisMBB->getIterator();
17599 F->insert(It, FirstMBB);
17600 F->insert(It, SecondMBB);
17601 F->insert(It, SinkMBB);
17602
17603 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17604 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17606 ThisMBB->end());
17607 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17608
17609 // Fallthrough block for ThisMBB.
17610 ThisMBB->addSuccessor(FirstMBB);
17611 // Fallthrough block for FirstMBB.
17612 FirstMBB->addSuccessor(SecondMBB);
17613 ThisMBB->addSuccessor(SinkMBB);
17614 FirstMBB->addSuccessor(SinkMBB);
17615 // This is fallthrough.
17616 SecondMBB->addSuccessor(SinkMBB);
17617
17618 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17619 Register FLHS = First.getOperand(1).getReg();
17620 Register FRHS = First.getOperand(2).getReg();
17621 // Insert appropriate branch.
17622 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17623 .addReg(FLHS)
17624 .addReg(FRHS)
17625 .addMBB(SinkMBB);
17626
17627 Register SLHS = Second.getOperand(1).getReg();
17628 Register SRHS = Second.getOperand(2).getReg();
17629 Register Op1Reg4 = First.getOperand(4).getReg();
17630 Register Op1Reg5 = First.getOperand(5).getReg();
17631
17632 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17633 // Insert appropriate branch.
17634 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17635 .addReg(SLHS)
17636 .addReg(SRHS)
17637 .addMBB(SinkMBB);
17638
17639 Register DestReg = Second.getOperand(0).getReg();
17640 Register Op2Reg4 = Second.getOperand(4).getReg();
17641 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17642 .addReg(Op2Reg4)
17643 .addMBB(ThisMBB)
17644 .addReg(Op1Reg4)
17645 .addMBB(FirstMBB)
17646 .addReg(Op1Reg5)
17647 .addMBB(SecondMBB);
17648
17649 // Now remove the Select_FPRX_s.
17650 First.eraseFromParent();
17651 Second.eraseFromParent();
17652 return SinkMBB;
17653}
17654
17657 const RISCVSubtarget &Subtarget) {
17658 // To "insert" Select_* instructions, we actually have to insert the triangle
17659 // control-flow pattern. The incoming instructions know the destination vreg
17660 // to set, the condition code register to branch on, the true/false values to
17661 // select between, and the condcode to use to select the appropriate branch.
17662 //
17663 // We produce the following control flow:
17664 // HeadMBB
17665 // | \
17666 // | IfFalseMBB
17667 // | /
17668 // TailMBB
17669 //
17670 // When we find a sequence of selects we attempt to optimize their emission
17671 // by sharing the control flow. Currently we only handle cases where we have
17672 // multiple selects with the exact same condition (same LHS, RHS and CC).
17673 // The selects may be interleaved with other instructions if the other
17674 // instructions meet some requirements we deem safe:
17675 // - They are not pseudo instructions.
17676 // - They are debug instructions. Otherwise,
17677 // - They do not have side-effects, do not access memory and their inputs do
17678 // not depend on the results of the select pseudo-instructions.
17679 // The TrueV/FalseV operands of the selects cannot depend on the result of
17680 // previous selects in the sequence.
17681 // These conditions could be further relaxed. See the X86 target for a
17682 // related approach and more information.
17683 //
17684 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
17685 // is checked here and handled by a separate function -
17686 // EmitLoweredCascadedSelect.
17687 Register LHS = MI.getOperand(1).getReg();
17688 Register RHS = MI.getOperand(2).getReg();
17689 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
17690
17691 SmallVector<MachineInstr *, 4> SelectDebugValues;
17692 SmallSet<Register, 4> SelectDests;
17693 SelectDests.insert(MI.getOperand(0).getReg());
17694
17695 MachineInstr *LastSelectPseudo = &MI;
17696 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
17697 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
17698 Next->getOpcode() == MI.getOpcode() &&
17699 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
17700 Next->getOperand(5).isKill()) {
17701 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
17702 }
17703
17704 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
17705 SequenceMBBI != E; ++SequenceMBBI) {
17706 if (SequenceMBBI->isDebugInstr())
17707 continue;
17708 if (isSelectPseudo(*SequenceMBBI)) {
17709 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
17710 SequenceMBBI->getOperand(2).getReg() != RHS ||
17711 SequenceMBBI->getOperand(3).getImm() != CC ||
17712 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
17713 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
17714 break;
17715 LastSelectPseudo = &*SequenceMBBI;
17716 SequenceMBBI->collectDebugValues(SelectDebugValues);
17717 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
17718 continue;
17719 }
17720 if (SequenceMBBI->hasUnmodeledSideEffects() ||
17721 SequenceMBBI->mayLoadOrStore() ||
17722 SequenceMBBI->usesCustomInsertionHook())
17723 break;
17724 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
17725 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
17726 }))
17727 break;
17728 }
17729
17730 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17731 const BasicBlock *LLVM_BB = BB->getBasicBlock();
17732 DebugLoc DL = MI.getDebugLoc();
17734
17735 MachineBasicBlock *HeadMBB = BB;
17736 MachineFunction *F = BB->getParent();
17737 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
17738 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
17739
17740 F->insert(I, IfFalseMBB);
17741 F->insert(I, TailMBB);
17742
17743 // Transfer debug instructions associated with the selects to TailMBB.
17744 for (MachineInstr *DebugInstr : SelectDebugValues) {
17745 TailMBB->push_back(DebugInstr->removeFromParent());
17746 }
17747
17748 // Move all instructions after the sequence to TailMBB.
17749 TailMBB->splice(TailMBB->end(), HeadMBB,
17750 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
17751 // Update machine-CFG edges by transferring all successors of the current
17752 // block to the new block which will contain the Phi nodes for the selects.
17753 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
17754 // Set the successors for HeadMBB.
17755 HeadMBB->addSuccessor(IfFalseMBB);
17756 HeadMBB->addSuccessor(TailMBB);
17757
17758 // Insert appropriate branch.
17759 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
17760 .addReg(LHS)
17761 .addReg(RHS)
17762 .addMBB(TailMBB);
17763
17764 // IfFalseMBB just falls through to TailMBB.
17765 IfFalseMBB->addSuccessor(TailMBB);
17766
17767 // Create PHIs for all of the select pseudo-instructions.
17768 auto SelectMBBI = MI.getIterator();
17769 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
17770 auto InsertionPoint = TailMBB->begin();
17771 while (SelectMBBI != SelectEnd) {
17772 auto Next = std::next(SelectMBBI);
17773 if (isSelectPseudo(*SelectMBBI)) {
17774 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
17775 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
17776 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
17777 .addReg(SelectMBBI->getOperand(4).getReg())
17778 .addMBB(HeadMBB)
17779 .addReg(SelectMBBI->getOperand(5).getReg())
17780 .addMBB(IfFalseMBB);
17781 SelectMBBI->eraseFromParent();
17782 }
17783 SelectMBBI = Next;
17784 }
17785
17786 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
17787 return TailMBB;
17788}
17789
17790// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
17791static const RISCV::RISCVMaskedPseudoInfo *
17792lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
17794 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
17795 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
17796 const RISCV::RISCVMaskedPseudoInfo *Masked =
17797 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
17798 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
17799 return Masked;
17800}
17801
17804 unsigned CVTXOpc) {
17805 DebugLoc DL = MI.getDebugLoc();
17806
17808
17810 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17811
17812 // Save the old value of FFLAGS.
17813 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
17814
17815 assert(MI.getNumOperands() == 7);
17816
17817 // Emit a VFCVT_X_F
17818 const TargetRegisterInfo *TRI =
17820 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
17821 Register Tmp = MRI.createVirtualRegister(RC);
17822 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
17823 .add(MI.getOperand(1))
17824 .add(MI.getOperand(2))
17825 .add(MI.getOperand(3))
17826 .add(MachineOperand::CreateImm(7)) // frm = DYN
17827 .add(MI.getOperand(4))
17828 .add(MI.getOperand(5))
17829 .add(MI.getOperand(6))
17830 .add(MachineOperand::CreateReg(RISCV::FRM,
17831 /*IsDef*/ false,
17832 /*IsImp*/ true));
17833
17834 // Emit a VFCVT_F_X
17835 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
17836 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
17837 // There is no E8 variant for VFCVT_F_X.
17838 assert(Log2SEW >= 4);
17839 unsigned CVTFOpc =
17840 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
17841 ->MaskedPseudo;
17842
17843 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
17844 .add(MI.getOperand(0))
17845 .add(MI.getOperand(1))
17846 .addReg(Tmp)
17847 .add(MI.getOperand(3))
17848 .add(MachineOperand::CreateImm(7)) // frm = DYN
17849 .add(MI.getOperand(4))
17850 .add(MI.getOperand(5))
17851 .add(MI.getOperand(6))
17852 .add(MachineOperand::CreateReg(RISCV::FRM,
17853 /*IsDef*/ false,
17854 /*IsImp*/ true));
17855
17856 // Restore FFLAGS.
17857 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17858 .addReg(SavedFFLAGS, RegState::Kill);
17859
17860 // Erase the pseudoinstruction.
17861 MI.eraseFromParent();
17862 return BB;
17863}
17864
17866 const RISCVSubtarget &Subtarget) {
17867 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
17868 const TargetRegisterClass *RC;
17869 switch (MI.getOpcode()) {
17870 default:
17871 llvm_unreachable("Unexpected opcode");
17872 case RISCV::PseudoFROUND_H:
17873 CmpOpc = RISCV::FLT_H;
17874 F2IOpc = RISCV::FCVT_W_H;
17875 I2FOpc = RISCV::FCVT_H_W;
17876 FSGNJOpc = RISCV::FSGNJ_H;
17877 FSGNJXOpc = RISCV::FSGNJX_H;
17878 RC = &RISCV::FPR16RegClass;
17879 break;
17880 case RISCV::PseudoFROUND_H_INX:
17881 CmpOpc = RISCV::FLT_H_INX;
17882 F2IOpc = RISCV::FCVT_W_H_INX;
17883 I2FOpc = RISCV::FCVT_H_W_INX;
17884 FSGNJOpc = RISCV::FSGNJ_H_INX;
17885 FSGNJXOpc = RISCV::FSGNJX_H_INX;
17886 RC = &RISCV::GPRF16RegClass;
17887 break;
17888 case RISCV::PseudoFROUND_S:
17889 CmpOpc = RISCV::FLT_S;
17890 F2IOpc = RISCV::FCVT_W_S;
17891 I2FOpc = RISCV::FCVT_S_W;
17892 FSGNJOpc = RISCV::FSGNJ_S;
17893 FSGNJXOpc = RISCV::FSGNJX_S;
17894 RC = &RISCV::FPR32RegClass;
17895 break;
17896 case RISCV::PseudoFROUND_S_INX:
17897 CmpOpc = RISCV::FLT_S_INX;
17898 F2IOpc = RISCV::FCVT_W_S_INX;
17899 I2FOpc = RISCV::FCVT_S_W_INX;
17900 FSGNJOpc = RISCV::FSGNJ_S_INX;
17901 FSGNJXOpc = RISCV::FSGNJX_S_INX;
17902 RC = &RISCV::GPRF32RegClass;
17903 break;
17904 case RISCV::PseudoFROUND_D:
17905 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17906 CmpOpc = RISCV::FLT_D;
17907 F2IOpc = RISCV::FCVT_L_D;
17908 I2FOpc = RISCV::FCVT_D_L;
17909 FSGNJOpc = RISCV::FSGNJ_D;
17910 FSGNJXOpc = RISCV::FSGNJX_D;
17911 RC = &RISCV::FPR64RegClass;
17912 break;
17913 case RISCV::PseudoFROUND_D_INX:
17914 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17915 CmpOpc = RISCV::FLT_D_INX;
17916 F2IOpc = RISCV::FCVT_L_D_INX;
17917 I2FOpc = RISCV::FCVT_D_L_INX;
17918 FSGNJOpc = RISCV::FSGNJ_D_INX;
17919 FSGNJXOpc = RISCV::FSGNJX_D_INX;
17920 RC = &RISCV::GPRRegClass;
17921 break;
17922 }
17923
17924 const BasicBlock *BB = MBB->getBasicBlock();
17925 DebugLoc DL = MI.getDebugLoc();
17927
17929 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
17930 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
17931
17932 F->insert(I, CvtMBB);
17933 F->insert(I, DoneMBB);
17934 // Move all instructions after the sequence to DoneMBB.
17935 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
17936 MBB->end());
17937 // Update machine-CFG edges by transferring all successors of the current
17938 // block to the new block which will contain the Phi nodes for the selects.
17940 // Set the successors for MBB.
17941 MBB->addSuccessor(CvtMBB);
17942 MBB->addSuccessor(DoneMBB);
17943
17944 Register DstReg = MI.getOperand(0).getReg();
17945 Register SrcReg = MI.getOperand(1).getReg();
17946 Register MaxReg = MI.getOperand(2).getReg();
17947 int64_t FRM = MI.getOperand(3).getImm();
17948
17949 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17951
17952 Register FabsReg = MRI.createVirtualRegister(RC);
17953 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
17954
17955 // Compare the FP value to the max value.
17956 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17957 auto MIB =
17958 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
17961
17962 // Insert branch.
17963 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
17964 .addReg(CmpReg)
17965 .addReg(RISCV::X0)
17966 .addMBB(DoneMBB);
17967
17968 CvtMBB->addSuccessor(DoneMBB);
17969
17970 // Convert to integer.
17971 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17972 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
17975
17976 // Convert back to FP.
17977 Register I2FReg = MRI.createVirtualRegister(RC);
17978 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
17981
17982 // Restore the sign bit.
17983 Register CvtReg = MRI.createVirtualRegister(RC);
17984 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
17985
17986 // Merge the results.
17987 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
17988 .addReg(SrcReg)
17989 .addMBB(MBB)
17990 .addReg(CvtReg)
17991 .addMBB(CvtMBB);
17992
17993 MI.eraseFromParent();
17994 return DoneMBB;
17995}
17996
17999 MachineBasicBlock *BB) const {
18000 switch (MI.getOpcode()) {
18001 default:
18002 llvm_unreachable("Unexpected instr type to insert");
18003 case RISCV::ReadCounterWide:
18004 assert(!Subtarget.is64Bit() &&
18005 "ReadCounterWide is only to be used on riscv32");
18006 return emitReadCounterWidePseudo(MI, BB);
18007 case RISCV::Select_GPR_Using_CC_GPR:
18008 case RISCV::Select_FPR16_Using_CC_GPR:
18009 case RISCV::Select_FPR16INX_Using_CC_GPR:
18010 case RISCV::Select_FPR32_Using_CC_GPR:
18011 case RISCV::Select_FPR32INX_Using_CC_GPR:
18012 case RISCV::Select_FPR64_Using_CC_GPR:
18013 case RISCV::Select_FPR64INX_Using_CC_GPR:
18014 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18015 return emitSelectPseudo(MI, BB, Subtarget);
18016 case RISCV::BuildPairF64Pseudo:
18017 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18018 case RISCV::SplitF64Pseudo:
18019 return emitSplitF64Pseudo(MI, BB, Subtarget);
18020 case RISCV::PseudoQuietFLE_H:
18021 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18022 case RISCV::PseudoQuietFLE_H_INX:
18023 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18024 case RISCV::PseudoQuietFLT_H:
18025 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18026 case RISCV::PseudoQuietFLT_H_INX:
18027 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18028 case RISCV::PseudoQuietFLE_S:
18029 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18030 case RISCV::PseudoQuietFLE_S_INX:
18031 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18032 case RISCV::PseudoQuietFLT_S:
18033 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18034 case RISCV::PseudoQuietFLT_S_INX:
18035 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18036 case RISCV::PseudoQuietFLE_D:
18037 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18038 case RISCV::PseudoQuietFLE_D_INX:
18039 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18040 case RISCV::PseudoQuietFLE_D_IN32X:
18041 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18042 Subtarget);
18043 case RISCV::PseudoQuietFLT_D:
18044 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18045 case RISCV::PseudoQuietFLT_D_INX:
18046 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18047 case RISCV::PseudoQuietFLT_D_IN32X:
18048 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18049 Subtarget);
18050
18051 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18052 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18053 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18054 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18055 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18056 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18057 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18058 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18059 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18060 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18061 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18062 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18063 case RISCV::PseudoFROUND_H:
18064 case RISCV::PseudoFROUND_H_INX:
18065 case RISCV::PseudoFROUND_S:
18066 case RISCV::PseudoFROUND_S_INX:
18067 case RISCV::PseudoFROUND_D:
18068 case RISCV::PseudoFROUND_D_INX:
18069 case RISCV::PseudoFROUND_D_IN32X:
18070 return emitFROUND(MI, BB, Subtarget);
18071 case TargetOpcode::STATEPOINT:
18072 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18073 // while jal call instruction (where statepoint will be lowered at the end)
18074 // has implicit def. This def is early-clobber as it will be set at
18075 // the moment of the call and earlier than any use is read.
18076 // Add this implicit dead def here as a workaround.
18077 MI.addOperand(*MI.getMF(),
18079 RISCV::X1, /*isDef*/ true,
18080 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18081 /*isUndef*/ false, /*isEarlyClobber*/ true));
18082 [[fallthrough]];
18083 case TargetOpcode::STACKMAP:
18084 case TargetOpcode::PATCHPOINT:
18085 if (!Subtarget.is64Bit())
18086 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18087 "supported on 64-bit targets");
18088 return emitPatchPoint(MI, BB);
18089 }
18090}
18091
18093 SDNode *Node) const {
18094 // Add FRM dependency to any instructions with dynamic rounding mode.
18095 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18096 if (Idx < 0) {
18097 // Vector pseudos have FRM index indicated by TSFlags.
18098 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18099 if (Idx < 0)
18100 return;
18101 }
18102 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18103 return;
18104 // If the instruction already reads FRM, don't add another read.
18105 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18106 return;
18107 MI.addOperand(
18108 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18109}
18110
18111// Calling Convention Implementation.
18112// The expectations for frontend ABI lowering vary from target to target.
18113// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18114// details, but this is a longer term goal. For now, we simply try to keep the
18115// role of the frontend as simple and well-defined as possible. The rules can
18116// be summarised as:
18117// * Never split up large scalar arguments. We handle them here.
18118// * If a hardfloat calling convention is being used, and the struct may be
18119// passed in a pair of registers (fp+fp, int+fp), and both registers are
18120// available, then pass as two separate arguments. If either the GPRs or FPRs
18121// are exhausted, then pass according to the rule below.
18122// * If a struct could never be passed in registers or directly in a stack
18123// slot (as it is larger than 2*XLEN and the floating point rules don't
18124// apply), then pass it using a pointer with the byval attribute.
18125// * If a struct is less than 2*XLEN, then coerce to either a two-element
18126// word-sized array or a 2*XLEN scalar (depending on alignment).
18127// * The frontend can determine whether a struct is returned by reference or
18128// not based on its size and fields. If it will be returned by reference, the
18129// frontend must modify the prototype so a pointer with the sret annotation is
18130// passed as the first argument. This is not necessary for large scalar
18131// returns.
18132// * Struct return values and varargs should be coerced to structs containing
18133// register-size fields in the same situations they would be for fixed
18134// arguments.
18135
18136static const MCPhysReg ArgFPR16s[] = {
18137 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18138 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18139};
18140static const MCPhysReg ArgFPR32s[] = {
18141 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18142 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18143};
18144static const MCPhysReg ArgFPR64s[] = {
18145 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18146 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18147};
18148// This is an interim calling convention and it may be changed in the future.
18149static const MCPhysReg ArgVRs[] = {
18150 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18151 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18152 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18153static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18154 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18155 RISCV::V20M2, RISCV::V22M2};
18156static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18157 RISCV::V20M4};
18158static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18159
18161 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18162 // the ILP32E ABI.
18163 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18164 RISCV::X13, RISCV::X14, RISCV::X15,
18165 RISCV::X16, RISCV::X17};
18166 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18167 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18168 RISCV::X13, RISCV::X14, RISCV::X15};
18169
18170 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18171 return ArrayRef(ArgEGPRs);
18172
18173 return ArrayRef(ArgIGPRs);
18174}
18175
18177 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18178 // for save-restore libcall, so we don't use them.
18179 static const MCPhysReg FastCCIGPRs[] = {
18180 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
18181 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
18182 RISCV::X29, RISCV::X30, RISCV::X31};
18183
18184 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18185 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18186 RISCV::X13, RISCV::X14, RISCV::X15,
18187 RISCV::X7};
18188
18189 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18190 return ArrayRef(FastCCEGPRs);
18191
18192 return ArrayRef(FastCCIGPRs);
18193}
18194
18195// Pass a 2*XLEN argument that has been split into two XLEN values through
18196// registers or the stack as necessary.
18197static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18198 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18199 MVT ValVT2, MVT LocVT2,
18200 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18201 unsigned XLenInBytes = XLen / 8;
18202 const RISCVSubtarget &STI =
18205
18206 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18207 // At least one half can be passed via register.
18208 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18209 VA1.getLocVT(), CCValAssign::Full));
18210 } else {
18211 // Both halves must be passed on the stack, with proper alignment.
18212 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18213 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18214 Align StackAlign(XLenInBytes);
18215 if (!EABI || XLen != 32)
18216 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18217 State.addLoc(
18219 State.AllocateStack(XLenInBytes, StackAlign),
18220 VA1.getLocVT(), CCValAssign::Full));
18222 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18223 LocVT2, CCValAssign::Full));
18224 return false;
18225 }
18226
18227 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18228 // The second half can also be passed via register.
18229 State.addLoc(
18230 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18231 } else {
18232 // The second half is passed via the stack, without additional alignment.
18234 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18235 LocVT2, CCValAssign::Full));
18236 }
18237
18238 return false;
18239}
18240
18241// Implements the RISC-V calling convention. Returns true upon failure.
18242bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18243 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18244 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18245 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18246 RVVArgDispatcher &RVVDispatcher) {
18247 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18248 assert(XLen == 32 || XLen == 64);
18249 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18250
18251 // Static chain parameter must not be passed in normal argument registers,
18252 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18253 if (ArgFlags.isNest()) {
18254 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18255 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18256 return false;
18257 }
18258 }
18259
18260 // Any return value split in to more than two values can't be returned
18261 // directly. Vectors are returned via the available vector registers.
18262 if (!LocVT.isVector() && IsRet && ValNo > 1)
18263 return true;
18264
18265 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18266 // variadic argument, or if no F16/F32 argument registers are available.
18267 bool UseGPRForF16_F32 = true;
18268 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18269 // variadic argument, or if no F64 argument registers are available.
18270 bool UseGPRForF64 = true;
18271
18272 switch (ABI) {
18273 default:
18274 llvm_unreachable("Unexpected ABI");
18277 case RISCVABI::ABI_LP64:
18279 break;
18282 UseGPRForF16_F32 = !IsFixed;
18283 break;
18286 UseGPRForF16_F32 = !IsFixed;
18287 UseGPRForF64 = !IsFixed;
18288 break;
18289 }
18290
18291 // FPR16, FPR32, and FPR64 alias each other.
18292 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18293 UseGPRForF16_F32 = true;
18294 UseGPRForF64 = true;
18295 }
18296
18297 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18298 // similar local variables rather than directly checking against the target
18299 // ABI.
18300
18301 if (UseGPRForF16_F32 &&
18302 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18303 LocVT = XLenVT;
18304 LocInfo = CCValAssign::BCvt;
18305 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18306 LocVT = MVT::i64;
18307 LocInfo = CCValAssign::BCvt;
18308 }
18309
18311
18312 // If this is a variadic argument, the RISC-V calling convention requires
18313 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18314 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18315 // be used regardless of whether the original argument was split during
18316 // legalisation or not. The argument will not be passed by registers if the
18317 // original type is larger than 2*XLEN, so the register alignment rule does
18318 // not apply.
18319 // TODO: To be compatible with GCC's behaviors, we don't align registers
18320 // currently if we are using ILP32E calling convention. This behavior may be
18321 // changed when RV32E/ILP32E is ratified.
18322 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18323 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18324 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18325 ABI != RISCVABI::ABI_ILP32E) {
18326 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18327 // Skip 'odd' register if necessary.
18328 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18329 State.AllocateReg(ArgGPRs);
18330 }
18331
18332 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18333 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18334 State.getPendingArgFlags();
18335
18336 assert(PendingLocs.size() == PendingArgFlags.size() &&
18337 "PendingLocs and PendingArgFlags out of sync");
18338
18339 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18340 // registers are exhausted.
18341 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18342 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18343 // Depending on available argument GPRS, f64 may be passed in a pair of
18344 // GPRs, split between a GPR and the stack, or passed completely on the
18345 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18346 // cases.
18347 Register Reg = State.AllocateReg(ArgGPRs);
18348 if (!Reg) {
18349 unsigned StackOffset = State.AllocateStack(8, Align(8));
18350 State.addLoc(
18351 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18352 return false;
18353 }
18354 LocVT = MVT::i32;
18355 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18356 Register HiReg = State.AllocateReg(ArgGPRs);
18357 if (HiReg) {
18358 State.addLoc(
18359 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18360 } else {
18361 unsigned StackOffset = State.AllocateStack(4, Align(4));
18362 State.addLoc(
18363 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18364 }
18365 return false;
18366 }
18367
18368 // Fixed-length vectors are located in the corresponding scalable-vector
18369 // container types.
18370 if (ValVT.isFixedLengthVector())
18371 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18372
18373 // Split arguments might be passed indirectly, so keep track of the pending
18374 // values. Split vectors are passed via a mix of registers and indirectly, so
18375 // treat them as we would any other argument.
18376 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18377 LocVT = XLenVT;
18378 LocInfo = CCValAssign::Indirect;
18379 PendingLocs.push_back(
18380 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18381 PendingArgFlags.push_back(ArgFlags);
18382 if (!ArgFlags.isSplitEnd()) {
18383 return false;
18384 }
18385 }
18386
18387 // If the split argument only had two elements, it should be passed directly
18388 // in registers or on the stack.
18389 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18390 PendingLocs.size() <= 2) {
18391 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18392 // Apply the normal calling convention rules to the first half of the
18393 // split argument.
18394 CCValAssign VA = PendingLocs[0];
18395 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18396 PendingLocs.clear();
18397 PendingArgFlags.clear();
18398 return CC_RISCVAssign2XLen(
18399 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18400 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18401 }
18402
18403 // Allocate to a register if possible, or else a stack slot.
18404 Register Reg;
18405 unsigned StoreSizeBytes = XLen / 8;
18406 Align StackAlign = Align(XLen / 8);
18407
18408 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18409 Reg = State.AllocateReg(ArgFPR16s);
18410 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18411 Reg = State.AllocateReg(ArgFPR32s);
18412 else if (ValVT == MVT::f64 && !UseGPRForF64)
18413 Reg = State.AllocateReg(ArgFPR64s);
18414 else if (ValVT.isVector()) {
18415 Reg = RVVDispatcher.getNextPhysReg();
18416 if (!Reg) {
18417 // For return values, the vector must be passed fully via registers or
18418 // via the stack.
18419 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18420 // but we're using all of them.
18421 if (IsRet)
18422 return true;
18423 // Try using a GPR to pass the address
18424 if ((Reg = State.AllocateReg(ArgGPRs))) {
18425 LocVT = XLenVT;
18426 LocInfo = CCValAssign::Indirect;
18427 } else if (ValVT.isScalableVector()) {
18428 LocVT = XLenVT;
18429 LocInfo = CCValAssign::Indirect;
18430 } else {
18431 // Pass fixed-length vectors on the stack.
18432 LocVT = ValVT;
18433 StoreSizeBytes = ValVT.getStoreSize();
18434 // Align vectors to their element sizes, being careful for vXi1
18435 // vectors.
18436 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18437 }
18438 }
18439 } else {
18440 Reg = State.AllocateReg(ArgGPRs);
18441 }
18442
18443 unsigned StackOffset =
18444 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18445
18446 // If we reach this point and PendingLocs is non-empty, we must be at the
18447 // end of a split argument that must be passed indirectly.
18448 if (!PendingLocs.empty()) {
18449 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18450 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18451
18452 for (auto &It : PendingLocs) {
18453 if (Reg)
18454 It.convertToReg(Reg);
18455 else
18456 It.convertToMem(StackOffset);
18457 State.addLoc(It);
18458 }
18459 PendingLocs.clear();
18460 PendingArgFlags.clear();
18461 return false;
18462 }
18463
18464 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18465 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18466 "Expected an XLenVT or vector types at this stage");
18467
18468 if (Reg) {
18469 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18470 return false;
18471 }
18472
18473 // When a scalar floating-point value is passed on the stack, no
18474 // bit-conversion is needed.
18475 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18476 assert(!ValVT.isVector());
18477 LocVT = ValVT;
18478 LocInfo = CCValAssign::Full;
18479 }
18480 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18481 return false;
18482}
18483
18484template <typename ArgTy>
18485static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18486 for (const auto &ArgIdx : enumerate(Args)) {
18487 MVT ArgVT = ArgIdx.value().VT;
18488 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18489 return ArgIdx.index();
18490 }
18491 return std::nullopt;
18492}
18493
18494void RISCVTargetLowering::analyzeInputArgs(
18495 MachineFunction &MF, CCState &CCInfo,
18496 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18497 RISCVCCAssignFn Fn) const {
18498 unsigned NumArgs = Ins.size();
18500
18501 RVVArgDispatcher Dispatcher;
18502 if (IsRet) {
18503 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18504 } else {
18505 SmallVector<Type *, 4> TypeList;
18506 for (const Argument &Arg : MF.getFunction().args())
18507 TypeList.push_back(Arg.getType());
18508 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18509 }
18510
18511 for (unsigned i = 0; i != NumArgs; ++i) {
18512 MVT ArgVT = Ins[i].VT;
18513 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18514
18515 Type *ArgTy = nullptr;
18516 if (IsRet)
18517 ArgTy = FType->getReturnType();
18518 else if (Ins[i].isOrigArg())
18519 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18520
18522 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18523 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18524 Dispatcher)) {
18525 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18526 << ArgVT << '\n');
18527 llvm_unreachable(nullptr);
18528 }
18529 }
18530}
18531
18532void RISCVTargetLowering::analyzeOutputArgs(
18533 MachineFunction &MF, CCState &CCInfo,
18534 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18535 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18536 unsigned NumArgs = Outs.size();
18537
18538 SmallVector<Type *, 4> TypeList;
18539 if (IsRet)
18540 TypeList.push_back(MF.getFunction().getReturnType());
18541 else if (CLI)
18542 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18543 TypeList.push_back(Arg.Ty);
18544 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18545
18546 for (unsigned i = 0; i != NumArgs; i++) {
18547 MVT ArgVT = Outs[i].VT;
18548 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18549 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18550
18552 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18553 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18554 Dispatcher)) {
18555 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18556 << ArgVT << "\n");
18557 llvm_unreachable(nullptr);
18558 }
18559 }
18560}
18561
18562// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18563// values.
18565 const CCValAssign &VA, const SDLoc &DL,
18566 const RISCVSubtarget &Subtarget) {
18567 switch (VA.getLocInfo()) {
18568 default:
18569 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18570 case CCValAssign::Full:
18572 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18573 break;
18574 case CCValAssign::BCvt:
18575 if (VA.getLocVT().isInteger() &&
18576 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18577 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18578 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18579 if (RV64LegalI32) {
18580 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
18581 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18582 } else {
18583 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18584 }
18585 } else {
18586 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18587 }
18588 break;
18589 }
18590 return Val;
18591}
18592
18593// The caller is responsible for loading the full value if the argument is
18594// passed with CCValAssign::Indirect.
18596 const CCValAssign &VA, const SDLoc &DL,
18597 const ISD::InputArg &In,
18598 const RISCVTargetLowering &TLI) {
18601 EVT LocVT = VA.getLocVT();
18602 SDValue Val;
18603 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18604 Register VReg = RegInfo.createVirtualRegister(RC);
18605 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18606 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18607
18608 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18609 if (In.isOrigArg()) {
18610 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18611 if (OrigArg->getType()->isIntegerTy()) {
18612 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18613 // An input zero extended from i31 can also be considered sign extended.
18614 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18615 (BitWidth < 32 && In.Flags.isZExt())) {
18617 RVFI->addSExt32Register(VReg);
18618 }
18619 }
18620 }
18621
18623 return Val;
18624
18625 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18626}
18627
18629 const CCValAssign &VA, const SDLoc &DL,
18630 const RISCVSubtarget &Subtarget) {
18631 EVT LocVT = VA.getLocVT();
18632
18633 switch (VA.getLocInfo()) {
18634 default:
18635 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18636 case CCValAssign::Full:
18637 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18638 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
18639 break;
18640 case CCValAssign::BCvt:
18641 if (LocVT.isInteger() &&
18642 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18643 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
18644 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18645 if (RV64LegalI32) {
18646 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
18647 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
18648 } else {
18649 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18650 }
18651 } else {
18652 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
18653 }
18654 break;
18655 }
18656 return Val;
18657}
18658
18659// The caller is responsible for loading the full value if the argument is
18660// passed with CCValAssign::Indirect.
18662 const CCValAssign &VA, const SDLoc &DL) {
18664 MachineFrameInfo &MFI = MF.getFrameInfo();
18665 EVT LocVT = VA.getLocVT();
18666 EVT ValVT = VA.getValVT();
18668 if (ValVT.isScalableVector()) {
18669 // When the value is a scalable vector, we save the pointer which points to
18670 // the scalable vector value in the stack. The ValVT will be the pointer
18671 // type, instead of the scalable vector type.
18672 ValVT = LocVT;
18673 }
18674 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
18675 /*IsImmutable=*/true);
18676 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18677 SDValue Val;
18678
18679 ISD::LoadExtType ExtType;
18680 switch (VA.getLocInfo()) {
18681 default:
18682 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18683 case CCValAssign::Full:
18685 case CCValAssign::BCvt:
18686 ExtType = ISD::NON_EXTLOAD;
18687 break;
18688 }
18689 Val = DAG.getExtLoad(
18690 ExtType, DL, LocVT, Chain, FIN,
18692 return Val;
18693}
18694
18696 const CCValAssign &VA,
18697 const CCValAssign &HiVA,
18698 const SDLoc &DL) {
18699 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
18700 "Unexpected VA");
18702 MachineFrameInfo &MFI = MF.getFrameInfo();
18704
18705 assert(VA.isRegLoc() && "Expected register VA assignment");
18706
18707 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18708 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
18709 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
18710 SDValue Hi;
18711 if (HiVA.isMemLoc()) {
18712 // Second half of f64 is passed on the stack.
18713 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
18714 /*IsImmutable=*/true);
18715 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
18716 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
18718 } else {
18719 // Second half of f64 is passed in another GPR.
18720 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18721 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
18722 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
18723 }
18724 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
18725}
18726
18727// FastCC has less than 1% performance improvement for some particular
18728// benchmark. But theoretically, it may has benenfit for some cases.
18730 unsigned ValNo, MVT ValVT, MVT LocVT,
18731 CCValAssign::LocInfo LocInfo,
18732 ISD::ArgFlagsTy ArgFlags, CCState &State,
18733 bool IsFixed, bool IsRet, Type *OrigTy,
18734 const RISCVTargetLowering &TLI,
18735 RVVArgDispatcher &RVVDispatcher) {
18736 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18737 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18738 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18739 return false;
18740 }
18741 }
18742
18743 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
18744
18745 if (LocVT == MVT::f16 &&
18746 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
18747 static const MCPhysReg FPR16List[] = {
18748 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
18749 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
18750 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
18751 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
18752 if (unsigned Reg = State.AllocateReg(FPR16List)) {
18753 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18754 return false;
18755 }
18756 }
18757
18758 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18759 static const MCPhysReg FPR32List[] = {
18760 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
18761 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
18762 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
18763 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
18764 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18765 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18766 return false;
18767 }
18768 }
18769
18770 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18771 static const MCPhysReg FPR64List[] = {
18772 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
18773 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
18774 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
18775 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
18776 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18777 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18778 return false;
18779 }
18780 }
18781
18782 // Check if there is an available GPR before hitting the stack.
18783 if ((LocVT == MVT::f16 &&
18784 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
18785 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18786 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
18787 Subtarget.hasStdExtZdinx())) {
18788 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18789 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18790 return false;
18791 }
18792 }
18793
18794 if (LocVT == MVT::f16) {
18795 unsigned Offset2 = State.AllocateStack(2, Align(2));
18796 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
18797 return false;
18798 }
18799
18800 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
18801 unsigned Offset4 = State.AllocateStack(4, Align(4));
18802 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
18803 return false;
18804 }
18805
18806 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
18807 unsigned Offset5 = State.AllocateStack(8, Align(8));
18808 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
18809 return false;
18810 }
18811
18812 if (LocVT.isVector()) {
18813 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
18814 if (AllocatedVReg) {
18815 // Fixed-length vectors are located in the corresponding scalable-vector
18816 // container types.
18817 if (ValVT.isFixedLengthVector())
18818 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18819 State.addLoc(
18820 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
18821 } else {
18822 // Try and pass the address via a "fast" GPR.
18823 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18824 LocInfo = CCValAssign::Indirect;
18825 LocVT = TLI.getSubtarget().getXLenVT();
18826 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
18827 } else if (ValVT.isFixedLengthVector()) {
18828 auto StackAlign =
18830 unsigned StackOffset =
18831 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
18832 State.addLoc(
18833 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18834 } else {
18835 // Can't pass scalable vectors on the stack.
18836 return true;
18837 }
18838 }
18839
18840 return false;
18841 }
18842
18843 return true; // CC didn't match.
18844}
18845
18846bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
18847 CCValAssign::LocInfo LocInfo,
18848 ISD::ArgFlagsTy ArgFlags, CCState &State) {
18849 if (ArgFlags.isNest()) {
18851 "Attribute 'nest' is not supported in GHC calling convention");
18852 }
18853
18854 static const MCPhysReg GPRList[] = {
18855 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
18856 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
18857
18858 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18859 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
18860 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
18861 if (unsigned Reg = State.AllocateReg(GPRList)) {
18862 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18863 return false;
18864 }
18865 }
18866
18867 const RISCVSubtarget &Subtarget =
18869
18870 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18871 // Pass in STG registers: F1, ..., F6
18872 // fs0 ... fs5
18873 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
18874 RISCV::F18_F, RISCV::F19_F,
18875 RISCV::F20_F, RISCV::F21_F};
18876 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18877 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18878 return false;
18879 }
18880 }
18881
18882 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18883 // Pass in STG registers: D1, ..., D6
18884 // fs6 ... fs11
18885 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
18886 RISCV::F24_D, RISCV::F25_D,
18887 RISCV::F26_D, RISCV::F27_D};
18888 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18889 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18890 return false;
18891 }
18892 }
18893
18894 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18895 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
18896 Subtarget.is64Bit())) {
18897 if (unsigned Reg = State.AllocateReg(GPRList)) {
18898 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18899 return false;
18900 }
18901 }
18902
18903 report_fatal_error("No registers left in GHC calling convention");
18904 return true;
18905}
18906
18907// Transform physical registers into virtual registers.
18909 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
18910 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
18911 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
18912
18914
18915 switch (CallConv) {
18916 default:
18917 report_fatal_error("Unsupported calling convention");
18918 case CallingConv::C:
18919 case CallingConv::Fast:
18921 case CallingConv::GRAAL:
18923 break;
18924 case CallingConv::GHC:
18925 if (Subtarget.hasStdExtE())
18926 report_fatal_error("GHC calling convention is not supported on RVE!");
18927 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
18928 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
18929 "(Zdinx/D) instruction set extensions");
18930 }
18931
18932 const Function &Func = MF.getFunction();
18933 if (Func.hasFnAttribute("interrupt")) {
18934 if (!Func.arg_empty())
18936 "Functions with the interrupt attribute cannot have arguments!");
18937
18938 StringRef Kind =
18939 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
18940
18941 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
18943 "Function interrupt attribute argument not supported!");
18944 }
18945
18946 EVT PtrVT = getPointerTy(DAG.getDataLayout());
18947 MVT XLenVT = Subtarget.getXLenVT();
18948 unsigned XLenInBytes = Subtarget.getXLen() / 8;
18949 // Used with vargs to acumulate store chains.
18950 std::vector<SDValue> OutChains;
18951
18952 // Assign locations to all of the incoming arguments.
18954 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
18955
18956 if (CallConv == CallingConv::GHC)
18958 else
18959 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
18961 : RISCV::CC_RISCV);
18962
18963 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
18964 CCValAssign &VA = ArgLocs[i];
18965 SDValue ArgValue;
18966 // Passing f64 on RV32D with a soft float ABI must be handled as a special
18967 // case.
18968 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18969 assert(VA.needsCustom());
18970 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
18971 } else if (VA.isRegLoc())
18972 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
18973 else
18974 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
18975
18976 if (VA.getLocInfo() == CCValAssign::Indirect) {
18977 // If the original argument was split and passed by reference (e.g. i128
18978 // on RV32), we need to load all parts of it here (using the same
18979 // address). Vectors may be partly split to registers and partly to the
18980 // stack, in which case the base address is partly offset and subsequent
18981 // stores are relative to that.
18982 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
18984 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
18985 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
18986 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
18987 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
18988 CCValAssign &PartVA = ArgLocs[i + 1];
18989 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
18990 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
18991 if (PartVA.getValVT().isScalableVector())
18992 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
18993 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
18994 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
18996 ++i;
18997 ++InsIdx;
18998 }
18999 continue;
19000 }
19001 InVals.push_back(ArgValue);
19002 }
19003
19004 if (any_of(ArgLocs,
19005 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19006 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19007
19008 if (IsVarArg) {
19009 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19010 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19011 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19012 MachineFrameInfo &MFI = MF.getFrameInfo();
19013 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19015
19016 // Size of the vararg save area. For now, the varargs save area is either
19017 // zero or large enough to hold a0-a7.
19018 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19019 int FI;
19020
19021 // If all registers are allocated, then all varargs must be passed on the
19022 // stack and we don't need to save any argregs.
19023 if (VarArgsSaveSize == 0) {
19024 int VaArgOffset = CCInfo.getStackSize();
19025 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19026 } else {
19027 int VaArgOffset = -VarArgsSaveSize;
19028 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19029
19030 // If saving an odd number of registers then create an extra stack slot to
19031 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19032 // offsets to even-numbered registered remain 2*XLEN-aligned.
19033 if (Idx % 2) {
19035 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19036 VarArgsSaveSize += XLenInBytes;
19037 }
19038
19039 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19040
19041 // Copy the integer registers that may have been used for passing varargs
19042 // to the vararg save area.
19043 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19044 const Register Reg = RegInfo.createVirtualRegister(RC);
19045 RegInfo.addLiveIn(ArgRegs[I], Reg);
19046 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19047 SDValue Store = DAG.getStore(
19048 Chain, DL, ArgValue, FIN,
19049 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19050 OutChains.push_back(Store);
19051 FIN =
19052 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19053 }
19054 }
19055
19056 // Record the frame index of the first variable argument
19057 // which is a value necessary to VASTART.
19058 RVFI->setVarArgsFrameIndex(FI);
19059 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19060 }
19061
19062 // All stores are grouped in one node to allow the matching between
19063 // the size of Ins and InVals. This only happens for vararg functions.
19064 if (!OutChains.empty()) {
19065 OutChains.push_back(Chain);
19066 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19067 }
19068
19069 return Chain;
19070}
19071
19072/// isEligibleForTailCallOptimization - Check whether the call is eligible
19073/// for tail call optimization.
19074/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19075bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19076 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19077 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19078
19079 auto CalleeCC = CLI.CallConv;
19080 auto &Outs = CLI.Outs;
19081 auto &Caller = MF.getFunction();
19082 auto CallerCC = Caller.getCallingConv();
19083
19084 // Exception-handling functions need a special set of instructions to
19085 // indicate a return to the hardware. Tail-calling another function would
19086 // probably break this.
19087 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19088 // should be expanded as new function attributes are introduced.
19089 if (Caller.hasFnAttribute("interrupt"))
19090 return false;
19091
19092 // Do not tail call opt if the stack is used to pass parameters.
19093 if (CCInfo.getStackSize() != 0)
19094 return false;
19095
19096 // Do not tail call opt if any parameters need to be passed indirectly.
19097 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19098 // passed indirectly. So the address of the value will be passed in a
19099 // register, or if not available, then the address is put on the stack. In
19100 // order to pass indirectly, space on the stack often needs to be allocated
19101 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19102 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19103 // are passed CCValAssign::Indirect.
19104 for (auto &VA : ArgLocs)
19105 if (VA.getLocInfo() == CCValAssign::Indirect)
19106 return false;
19107
19108 // Do not tail call opt if either caller or callee uses struct return
19109 // semantics.
19110 auto IsCallerStructRet = Caller.hasStructRetAttr();
19111 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19112 if (IsCallerStructRet || IsCalleeStructRet)
19113 return false;
19114
19115 // The callee has to preserve all registers the caller needs to preserve.
19116 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19117 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19118 if (CalleeCC != CallerCC) {
19119 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19120 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19121 return false;
19122 }
19123
19124 // Byval parameters hand the function a pointer directly into the stack area
19125 // we want to reuse during a tail call. Working around this *is* possible
19126 // but less efficient and uglier in LowerCall.
19127 for (auto &Arg : Outs)
19128 if (Arg.Flags.isByVal())
19129 return false;
19130
19131 return true;
19132}
19133
19135 return DAG.getDataLayout().getPrefTypeAlign(
19136 VT.getTypeForEVT(*DAG.getContext()));
19137}
19138
19139// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19140// and output parameter nodes.
19142 SmallVectorImpl<SDValue> &InVals) const {
19143 SelectionDAG &DAG = CLI.DAG;
19144 SDLoc &DL = CLI.DL;
19146 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19148 SDValue Chain = CLI.Chain;
19149 SDValue Callee = CLI.Callee;
19150 bool &IsTailCall = CLI.IsTailCall;
19151 CallingConv::ID CallConv = CLI.CallConv;
19152 bool IsVarArg = CLI.IsVarArg;
19153 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19154 MVT XLenVT = Subtarget.getXLenVT();
19155
19157
19158 // Analyze the operands of the call, assigning locations to each operand.
19160 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19161
19162 if (CallConv == CallingConv::GHC) {
19163 if (Subtarget.hasStdExtE())
19164 report_fatal_error("GHC calling convention is not supported on RVE!");
19166 } else
19167 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19169 : RISCV::CC_RISCV);
19170
19171 // Check if it's really possible to do a tail call.
19172 if (IsTailCall)
19173 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19174
19175 if (IsTailCall)
19176 ++NumTailCalls;
19177 else if (CLI.CB && CLI.CB->isMustTailCall())
19178 report_fatal_error("failed to perform tail call elimination on a call "
19179 "site marked musttail");
19180
19181 // Get a count of how many bytes are to be pushed on the stack.
19182 unsigned NumBytes = ArgCCInfo.getStackSize();
19183
19184 // Create local copies for byval args
19185 SmallVector<SDValue, 8> ByValArgs;
19186 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19187 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19188 if (!Flags.isByVal())
19189 continue;
19190
19191 SDValue Arg = OutVals[i];
19192 unsigned Size = Flags.getByValSize();
19193 Align Alignment = Flags.getNonZeroByValAlign();
19194
19195 int FI =
19196 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19197 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19198 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19199
19200 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19201 /*IsVolatile=*/false,
19202 /*AlwaysInline=*/false, IsTailCall,
19204 ByValArgs.push_back(FIPtr);
19205 }
19206
19207 if (!IsTailCall)
19208 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19209
19210 // Copy argument values to their designated locations.
19212 SmallVector<SDValue, 8> MemOpChains;
19213 SDValue StackPtr;
19214 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19215 ++i, ++OutIdx) {
19216 CCValAssign &VA = ArgLocs[i];
19217 SDValue ArgValue = OutVals[OutIdx];
19218 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19219
19220 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19221 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19222 assert(VA.isRegLoc() && "Expected register VA assignment");
19223 assert(VA.needsCustom());
19224 SDValue SplitF64 = DAG.getNode(
19225 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19226 SDValue Lo = SplitF64.getValue(0);
19227 SDValue Hi = SplitF64.getValue(1);
19228
19229 Register RegLo = VA.getLocReg();
19230 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19231
19232 // Get the CCValAssign for the Hi part.
19233 CCValAssign &HiVA = ArgLocs[++i];
19234
19235 if (HiVA.isMemLoc()) {
19236 // Second half of f64 is passed on the stack.
19237 if (!StackPtr.getNode())
19238 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19240 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19241 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19242 // Emit the store.
19243 MemOpChains.push_back(
19244 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19245 } else {
19246 // Second half of f64 is passed in another GPR.
19247 Register RegHigh = HiVA.getLocReg();
19248 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19249 }
19250 continue;
19251 }
19252
19253 // Promote the value if needed.
19254 // For now, only handle fully promoted and indirect arguments.
19255 if (VA.getLocInfo() == CCValAssign::Indirect) {
19256 // Store the argument in a stack slot and pass its address.
19257 Align StackAlign =
19258 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19259 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19260 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19261 // If the original argument was split (e.g. i128), we need
19262 // to store the required parts of it here (and pass just one address).
19263 // Vectors may be partly split to registers and partly to the stack, in
19264 // which case the base address is partly offset and subsequent stores are
19265 // relative to that.
19266 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19267 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19268 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19269 // Calculate the total size to store. We don't have access to what we're
19270 // actually storing other than performing the loop and collecting the
19271 // info.
19273 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19274 SDValue PartValue = OutVals[OutIdx + 1];
19275 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19276 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19277 EVT PartVT = PartValue.getValueType();
19278 if (PartVT.isScalableVector())
19279 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19280 StoredSize += PartVT.getStoreSize();
19281 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19282 Parts.push_back(std::make_pair(PartValue, Offset));
19283 ++i;
19284 ++OutIdx;
19285 }
19286 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19287 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19288 MemOpChains.push_back(
19289 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19291 for (const auto &Part : Parts) {
19292 SDValue PartValue = Part.first;
19293 SDValue PartOffset = Part.second;
19295 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19296 MemOpChains.push_back(
19297 DAG.getStore(Chain, DL, PartValue, Address,
19299 }
19300 ArgValue = SpillSlot;
19301 } else {
19302 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19303 }
19304
19305 // Use local copy if it is a byval arg.
19306 if (Flags.isByVal())
19307 ArgValue = ByValArgs[j++];
19308
19309 if (VA.isRegLoc()) {
19310 // Queue up the argument copies and emit them at the end.
19311 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19312 } else {
19313 assert(VA.isMemLoc() && "Argument not register or memory");
19314 assert(!IsTailCall && "Tail call not allowed if stack is used "
19315 "for passing parameters");
19316
19317 // Work out the address of the stack slot.
19318 if (!StackPtr.getNode())
19319 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19321 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19323
19324 // Emit the store.
19325 MemOpChains.push_back(
19326 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19327 }
19328 }
19329
19330 // Join the stores, which are independent of one another.
19331 if (!MemOpChains.empty())
19332 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19333
19334 SDValue Glue;
19335
19336 // Build a sequence of copy-to-reg nodes, chained and glued together.
19337 for (auto &Reg : RegsToPass) {
19338 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19339 Glue = Chain.getValue(1);
19340 }
19341
19342 // Validate that none of the argument registers have been marked as
19343 // reserved, if so report an error. Do the same for the return address if this
19344 // is not a tailcall.
19345 validateCCReservedRegs(RegsToPass, MF);
19346 if (!IsTailCall &&
19349 MF.getFunction(),
19350 "Return address register required, but has been reserved."});
19351
19352 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19353 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19354 // split it and then direct call can be matched by PseudoCALL.
19355 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19356 const GlobalValue *GV = S->getGlobal();
19357 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19358 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19359 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19360 }
19361
19362 // The first call operand is the chain and the second is the target address.
19364 Ops.push_back(Chain);
19365 Ops.push_back(Callee);
19366
19367 // Add argument registers to the end of the list so that they are
19368 // known live into the call.
19369 for (auto &Reg : RegsToPass)
19370 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19371
19372 if (!IsTailCall) {
19373 // Add a register mask operand representing the call-preserved registers.
19374 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19375 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19376 assert(Mask && "Missing call preserved mask for calling convention");
19377 Ops.push_back(DAG.getRegisterMask(Mask));
19378 }
19379
19380 // Glue the call to the argument copies, if any.
19381 if (Glue.getNode())
19382 Ops.push_back(Glue);
19383
19384 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19385 "Unexpected CFI type for a direct call");
19386
19387 // Emit the call.
19388 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19389
19390 if (IsTailCall) {
19392 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19393 if (CLI.CFIType)
19394 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19395 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19396 return Ret;
19397 }
19398
19399 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19400 if (CLI.CFIType)
19401 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19402 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19403 Glue = Chain.getValue(1);
19404
19405 // Mark the end of the call, which is glued to the call itself.
19406 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19407 Glue = Chain.getValue(1);
19408
19409 // Assign locations to each value returned by this call.
19411 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19412 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19413
19414 // Copy all of the result registers out of their specified physreg.
19415 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19416 auto &VA = RVLocs[i];
19417 // Copy the value out
19418 SDValue RetValue =
19419 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19420 // Glue the RetValue to the end of the call sequence
19421 Chain = RetValue.getValue(1);
19422 Glue = RetValue.getValue(2);
19423
19424 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19425 assert(VA.needsCustom());
19426 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19427 MVT::i32, Glue);
19428 Chain = RetValue2.getValue(1);
19429 Glue = RetValue2.getValue(2);
19430 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19431 RetValue2);
19432 }
19433
19434 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19435
19436 InVals.push_back(RetValue);
19437 }
19438
19439 return Chain;
19440}
19441
19443 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19444 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19446 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19447
19448 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19449
19450 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19451 MVT VT = Outs[i].VT;
19452 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19453 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19454 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19455 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19456 nullptr, *this, Dispatcher))
19457 return false;
19458 }
19459 return true;
19460}
19461
19462SDValue
19464 bool IsVarArg,
19466 const SmallVectorImpl<SDValue> &OutVals,
19467 const SDLoc &DL, SelectionDAG &DAG) const {
19469 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19470
19471 // Stores the assignment of the return value to a location.
19473
19474 // Info about the registers and stack slot.
19475 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19476 *DAG.getContext());
19477
19478 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19479 nullptr, RISCV::CC_RISCV);
19480
19481 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19482 report_fatal_error("GHC functions return void only");
19483
19484 SDValue Glue;
19485 SmallVector<SDValue, 4> RetOps(1, Chain);
19486
19487 // Copy the result values into the output registers.
19488 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19489 SDValue Val = OutVals[OutIdx];
19490 CCValAssign &VA = RVLocs[i];
19491 assert(VA.isRegLoc() && "Can only return in registers!");
19492
19493 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19494 // Handle returning f64 on RV32D with a soft float ABI.
19495 assert(VA.isRegLoc() && "Expected return via registers");
19496 assert(VA.needsCustom());
19497 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19498 DAG.getVTList(MVT::i32, MVT::i32), Val);
19499 SDValue Lo = SplitF64.getValue(0);
19500 SDValue Hi = SplitF64.getValue(1);
19501 Register RegLo = VA.getLocReg();
19502 Register RegHi = RVLocs[++i].getLocReg();
19503
19504 if (STI.isRegisterReservedByUser(RegLo) ||
19505 STI.isRegisterReservedByUser(RegHi))
19507 MF.getFunction(),
19508 "Return value register required, but has been reserved."});
19509
19510 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19511 Glue = Chain.getValue(1);
19512 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19513 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19514 Glue = Chain.getValue(1);
19515 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19516 } else {
19517 // Handle a 'normal' return.
19518 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19519 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19520
19521 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19523 MF.getFunction(),
19524 "Return value register required, but has been reserved."});
19525
19526 // Guarantee that all emitted copies are stuck together.
19527 Glue = Chain.getValue(1);
19528 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19529 }
19530 }
19531
19532 RetOps[0] = Chain; // Update chain.
19533
19534 // Add the glue node if we have it.
19535 if (Glue.getNode()) {
19536 RetOps.push_back(Glue);
19537 }
19538
19539 if (any_of(RVLocs,
19540 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19541 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19542
19543 unsigned RetOpc = RISCVISD::RET_GLUE;
19544 // Interrupt service routines use different return instructions.
19545 const Function &Func = DAG.getMachineFunction().getFunction();
19546 if (Func.hasFnAttribute("interrupt")) {
19547 if (!Func.getReturnType()->isVoidTy())
19549 "Functions with the interrupt attribute must have void return type!");
19550
19552 StringRef Kind =
19553 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19554
19555 if (Kind == "supervisor")
19556 RetOpc = RISCVISD::SRET_GLUE;
19557 else
19558 RetOpc = RISCVISD::MRET_GLUE;
19559 }
19560
19561 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19562}
19563
19564void RISCVTargetLowering::validateCCReservedRegs(
19565 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19566 MachineFunction &MF) const {
19567 const Function &F = MF.getFunction();
19568 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19569
19570 if (llvm::any_of(Regs, [&STI](auto Reg) {
19571 return STI.isRegisterReservedByUser(Reg.first);
19572 }))
19573 F.getContext().diagnose(DiagnosticInfoUnsupported{
19574 F, "Argument register required, but has been reserved."});
19575}
19576
19577// Check if the result of the node is only used as a return value, as
19578// otherwise we can't perform a tail-call.
19580 if (N->getNumValues() != 1)
19581 return false;
19582 if (!N->hasNUsesOfValue(1, 0))
19583 return false;
19584
19585 SDNode *Copy = *N->use_begin();
19586
19587 if (Copy->getOpcode() == ISD::BITCAST) {
19588 return isUsedByReturnOnly(Copy, Chain);
19589 }
19590
19591 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19592 // with soft float ABIs.
19593 if (Copy->getOpcode() != ISD::CopyToReg) {
19594 return false;
19595 }
19596
19597 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19598 // isn't safe to perform a tail call.
19599 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19600 return false;
19601
19602 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19603 bool HasRet = false;
19604 for (SDNode *Node : Copy->uses()) {
19605 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19606 return false;
19607 HasRet = true;
19608 }
19609 if (!HasRet)
19610 return false;
19611
19612 Chain = Copy->getOperand(0);
19613 return true;
19614}
19615
19617 return CI->isTailCall();
19618}
19619
19620const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19621#define NODE_NAME_CASE(NODE) \
19622 case RISCVISD::NODE: \
19623 return "RISCVISD::" #NODE;
19624 // clang-format off
19625 switch ((RISCVISD::NodeType)Opcode) {
19627 break;
19628 NODE_NAME_CASE(RET_GLUE)
19629 NODE_NAME_CASE(SRET_GLUE)
19630 NODE_NAME_CASE(MRET_GLUE)
19631 NODE_NAME_CASE(CALL)
19632 NODE_NAME_CASE(SELECT_CC)
19633 NODE_NAME_CASE(BR_CC)
19634 NODE_NAME_CASE(BuildPairF64)
19635 NODE_NAME_CASE(SplitF64)
19636 NODE_NAME_CASE(TAIL)
19637 NODE_NAME_CASE(ADD_LO)
19638 NODE_NAME_CASE(HI)
19639 NODE_NAME_CASE(LLA)
19640 NODE_NAME_CASE(ADD_TPREL)
19641 NODE_NAME_CASE(MULHSU)
19642 NODE_NAME_CASE(SHL_ADD)
19643 NODE_NAME_CASE(SLLW)
19644 NODE_NAME_CASE(SRAW)
19645 NODE_NAME_CASE(SRLW)
19646 NODE_NAME_CASE(DIVW)
19647 NODE_NAME_CASE(DIVUW)
19648 NODE_NAME_CASE(REMUW)
19649 NODE_NAME_CASE(ROLW)
19650 NODE_NAME_CASE(RORW)
19651 NODE_NAME_CASE(CLZW)
19652 NODE_NAME_CASE(CTZW)
19653 NODE_NAME_CASE(ABSW)
19654 NODE_NAME_CASE(FMV_H_X)
19655 NODE_NAME_CASE(FMV_X_ANYEXTH)
19656 NODE_NAME_CASE(FMV_X_SIGNEXTH)
19657 NODE_NAME_CASE(FMV_W_X_RV64)
19658 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19659 NODE_NAME_CASE(FCVT_X)
19660 NODE_NAME_CASE(FCVT_XU)
19661 NODE_NAME_CASE(FCVT_W_RV64)
19662 NODE_NAME_CASE(FCVT_WU_RV64)
19663 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19664 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19665 NODE_NAME_CASE(FP_ROUND_BF16)
19666 NODE_NAME_CASE(FP_EXTEND_BF16)
19667 NODE_NAME_CASE(FROUND)
19668 NODE_NAME_CASE(FCLASS)
19669 NODE_NAME_CASE(FMAX)
19670 NODE_NAME_CASE(FMIN)
19671 NODE_NAME_CASE(READ_COUNTER_WIDE)
19672 NODE_NAME_CASE(BREV8)
19673 NODE_NAME_CASE(ORC_B)
19674 NODE_NAME_CASE(ZIP)
19675 NODE_NAME_CASE(UNZIP)
19676 NODE_NAME_CASE(CLMUL)
19677 NODE_NAME_CASE(CLMULH)
19678 NODE_NAME_CASE(CLMULR)
19679 NODE_NAME_CASE(MOPR)
19680 NODE_NAME_CASE(MOPRR)
19681 NODE_NAME_CASE(SHA256SIG0)
19682 NODE_NAME_CASE(SHA256SIG1)
19683 NODE_NAME_CASE(SHA256SUM0)
19684 NODE_NAME_CASE(SHA256SUM1)
19685 NODE_NAME_CASE(SM4KS)
19686 NODE_NAME_CASE(SM4ED)
19687 NODE_NAME_CASE(SM3P0)
19688 NODE_NAME_CASE(SM3P1)
19689 NODE_NAME_CASE(TH_LWD)
19690 NODE_NAME_CASE(TH_LWUD)
19691 NODE_NAME_CASE(TH_LDD)
19692 NODE_NAME_CASE(TH_SWD)
19693 NODE_NAME_CASE(TH_SDD)
19694 NODE_NAME_CASE(VMV_V_V_VL)
19695 NODE_NAME_CASE(VMV_V_X_VL)
19696 NODE_NAME_CASE(VFMV_V_F_VL)
19697 NODE_NAME_CASE(VMV_X_S)
19698 NODE_NAME_CASE(VMV_S_X_VL)
19699 NODE_NAME_CASE(VFMV_S_F_VL)
19700 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
19701 NODE_NAME_CASE(READ_VLENB)
19702 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
19703 NODE_NAME_CASE(VSLIDEUP_VL)
19704 NODE_NAME_CASE(VSLIDE1UP_VL)
19705 NODE_NAME_CASE(VSLIDEDOWN_VL)
19706 NODE_NAME_CASE(VSLIDE1DOWN_VL)
19707 NODE_NAME_CASE(VFSLIDE1UP_VL)
19708 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
19709 NODE_NAME_CASE(VID_VL)
19710 NODE_NAME_CASE(VFNCVT_ROD_VL)
19711 NODE_NAME_CASE(VECREDUCE_ADD_VL)
19712 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
19713 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
19714 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
19715 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
19716 NODE_NAME_CASE(VECREDUCE_AND_VL)
19717 NODE_NAME_CASE(VECREDUCE_OR_VL)
19718 NODE_NAME_CASE(VECREDUCE_XOR_VL)
19719 NODE_NAME_CASE(VECREDUCE_FADD_VL)
19720 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
19721 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
19722 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
19723 NODE_NAME_CASE(ADD_VL)
19724 NODE_NAME_CASE(AND_VL)
19725 NODE_NAME_CASE(MUL_VL)
19726 NODE_NAME_CASE(OR_VL)
19727 NODE_NAME_CASE(SDIV_VL)
19728 NODE_NAME_CASE(SHL_VL)
19729 NODE_NAME_CASE(SREM_VL)
19730 NODE_NAME_CASE(SRA_VL)
19731 NODE_NAME_CASE(SRL_VL)
19732 NODE_NAME_CASE(ROTL_VL)
19733 NODE_NAME_CASE(ROTR_VL)
19734 NODE_NAME_CASE(SUB_VL)
19735 NODE_NAME_CASE(UDIV_VL)
19736 NODE_NAME_CASE(UREM_VL)
19737 NODE_NAME_CASE(XOR_VL)
19738 NODE_NAME_CASE(AVGFLOORU_VL)
19739 NODE_NAME_CASE(AVGCEILU_VL)
19740 NODE_NAME_CASE(SADDSAT_VL)
19741 NODE_NAME_CASE(UADDSAT_VL)
19742 NODE_NAME_CASE(SSUBSAT_VL)
19743 NODE_NAME_CASE(USUBSAT_VL)
19744 NODE_NAME_CASE(FADD_VL)
19745 NODE_NAME_CASE(FSUB_VL)
19746 NODE_NAME_CASE(FMUL_VL)
19747 NODE_NAME_CASE(FDIV_VL)
19748 NODE_NAME_CASE(FNEG_VL)
19749 NODE_NAME_CASE(FABS_VL)
19750 NODE_NAME_CASE(FSQRT_VL)
19751 NODE_NAME_CASE(FCLASS_VL)
19752 NODE_NAME_CASE(VFMADD_VL)
19753 NODE_NAME_CASE(VFNMADD_VL)
19754 NODE_NAME_CASE(VFMSUB_VL)
19755 NODE_NAME_CASE(VFNMSUB_VL)
19756 NODE_NAME_CASE(VFWMADD_VL)
19757 NODE_NAME_CASE(VFWNMADD_VL)
19758 NODE_NAME_CASE(VFWMSUB_VL)
19759 NODE_NAME_CASE(VFWNMSUB_VL)
19760 NODE_NAME_CASE(FCOPYSIGN_VL)
19761 NODE_NAME_CASE(SMIN_VL)
19762 NODE_NAME_CASE(SMAX_VL)
19763 NODE_NAME_CASE(UMIN_VL)
19764 NODE_NAME_CASE(UMAX_VL)
19765 NODE_NAME_CASE(BITREVERSE_VL)
19766 NODE_NAME_CASE(BSWAP_VL)
19767 NODE_NAME_CASE(CTLZ_VL)
19768 NODE_NAME_CASE(CTTZ_VL)
19769 NODE_NAME_CASE(CTPOP_VL)
19770 NODE_NAME_CASE(VFMIN_VL)
19771 NODE_NAME_CASE(VFMAX_VL)
19772 NODE_NAME_CASE(MULHS_VL)
19773 NODE_NAME_CASE(MULHU_VL)
19774 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
19775 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
19776 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
19777 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
19778 NODE_NAME_CASE(VFCVT_X_F_VL)
19779 NODE_NAME_CASE(VFCVT_XU_F_VL)
19780 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
19781 NODE_NAME_CASE(SINT_TO_FP_VL)
19782 NODE_NAME_CASE(UINT_TO_FP_VL)
19783 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
19784 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
19785 NODE_NAME_CASE(FP_EXTEND_VL)
19786 NODE_NAME_CASE(FP_ROUND_VL)
19787 NODE_NAME_CASE(STRICT_FADD_VL)
19788 NODE_NAME_CASE(STRICT_FSUB_VL)
19789 NODE_NAME_CASE(STRICT_FMUL_VL)
19790 NODE_NAME_CASE(STRICT_FDIV_VL)
19791 NODE_NAME_CASE(STRICT_FSQRT_VL)
19792 NODE_NAME_CASE(STRICT_VFMADD_VL)
19793 NODE_NAME_CASE(STRICT_VFNMADD_VL)
19794 NODE_NAME_CASE(STRICT_VFMSUB_VL)
19795 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
19796 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
19797 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
19798 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
19799 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
19800 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
19801 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
19802 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
19803 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
19804 NODE_NAME_CASE(STRICT_FSETCC_VL)
19805 NODE_NAME_CASE(STRICT_FSETCCS_VL)
19806 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
19807 NODE_NAME_CASE(VWMUL_VL)
19808 NODE_NAME_CASE(VWMULU_VL)
19809 NODE_NAME_CASE(VWMULSU_VL)
19810 NODE_NAME_CASE(VWADD_VL)
19811 NODE_NAME_CASE(VWADDU_VL)
19812 NODE_NAME_CASE(VWSUB_VL)
19813 NODE_NAME_CASE(VWSUBU_VL)
19814 NODE_NAME_CASE(VWADD_W_VL)
19815 NODE_NAME_CASE(VWADDU_W_VL)
19816 NODE_NAME_CASE(VWSUB_W_VL)
19817 NODE_NAME_CASE(VWSUBU_W_VL)
19818 NODE_NAME_CASE(VWSLL_VL)
19819 NODE_NAME_CASE(VFWMUL_VL)
19820 NODE_NAME_CASE(VFWADD_VL)
19821 NODE_NAME_CASE(VFWSUB_VL)
19822 NODE_NAME_CASE(VFWADD_W_VL)
19823 NODE_NAME_CASE(VFWSUB_W_VL)
19824 NODE_NAME_CASE(VWMACC_VL)
19825 NODE_NAME_CASE(VWMACCU_VL)
19826 NODE_NAME_CASE(VWMACCSU_VL)
19827 NODE_NAME_CASE(VNSRL_VL)
19828 NODE_NAME_CASE(SETCC_VL)
19829 NODE_NAME_CASE(VMERGE_VL)
19830 NODE_NAME_CASE(VMAND_VL)
19831 NODE_NAME_CASE(VMOR_VL)
19832 NODE_NAME_CASE(VMXOR_VL)
19833 NODE_NAME_CASE(VMCLR_VL)
19834 NODE_NAME_CASE(VMSET_VL)
19835 NODE_NAME_CASE(VRGATHER_VX_VL)
19836 NODE_NAME_CASE(VRGATHER_VV_VL)
19837 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
19838 NODE_NAME_CASE(VSEXT_VL)
19839 NODE_NAME_CASE(VZEXT_VL)
19840 NODE_NAME_CASE(VCPOP_VL)
19841 NODE_NAME_CASE(VFIRST_VL)
19842 NODE_NAME_CASE(READ_CSR)
19843 NODE_NAME_CASE(WRITE_CSR)
19844 NODE_NAME_CASE(SWAP_CSR)
19845 NODE_NAME_CASE(CZERO_EQZ)
19846 NODE_NAME_CASE(CZERO_NEZ)
19847 NODE_NAME_CASE(SF_VC_XV_SE)
19848 NODE_NAME_CASE(SF_VC_IV_SE)
19849 NODE_NAME_CASE(SF_VC_VV_SE)
19850 NODE_NAME_CASE(SF_VC_FV_SE)
19851 NODE_NAME_CASE(SF_VC_XVV_SE)
19852 NODE_NAME_CASE(SF_VC_IVV_SE)
19853 NODE_NAME_CASE(SF_VC_VVV_SE)
19854 NODE_NAME_CASE(SF_VC_FVV_SE)
19855 NODE_NAME_CASE(SF_VC_XVW_SE)
19856 NODE_NAME_CASE(SF_VC_IVW_SE)
19857 NODE_NAME_CASE(SF_VC_VVW_SE)
19858 NODE_NAME_CASE(SF_VC_FVW_SE)
19859 NODE_NAME_CASE(SF_VC_V_X_SE)
19860 NODE_NAME_CASE(SF_VC_V_I_SE)
19861 NODE_NAME_CASE(SF_VC_V_XV_SE)
19862 NODE_NAME_CASE(SF_VC_V_IV_SE)
19863 NODE_NAME_CASE(SF_VC_V_VV_SE)
19864 NODE_NAME_CASE(SF_VC_V_FV_SE)
19865 NODE_NAME_CASE(SF_VC_V_XVV_SE)
19866 NODE_NAME_CASE(SF_VC_V_IVV_SE)
19867 NODE_NAME_CASE(SF_VC_V_VVV_SE)
19868 NODE_NAME_CASE(SF_VC_V_FVV_SE)
19869 NODE_NAME_CASE(SF_VC_V_XVW_SE)
19870 NODE_NAME_CASE(SF_VC_V_IVW_SE)
19871 NODE_NAME_CASE(SF_VC_V_VVW_SE)
19872 NODE_NAME_CASE(SF_VC_V_FVW_SE)
19873 }
19874 // clang-format on
19875 return nullptr;
19876#undef NODE_NAME_CASE
19877}
19878
19879/// getConstraintType - Given a constraint letter, return the type of
19880/// constraint it is for this target.
19883 if (Constraint.size() == 1) {
19884 switch (Constraint[0]) {
19885 default:
19886 break;
19887 case 'f':
19888 return C_RegisterClass;
19889 case 'I':
19890 case 'J':
19891 case 'K':
19892 return C_Immediate;
19893 case 'A':
19894 return C_Memory;
19895 case 's':
19896 case 'S': // A symbolic address
19897 return C_Other;
19898 }
19899 } else {
19900 if (Constraint == "vr" || Constraint == "vm")
19901 return C_RegisterClass;
19902 }
19903 return TargetLowering::getConstraintType(Constraint);
19904}
19905
19906std::pair<unsigned, const TargetRegisterClass *>
19908 StringRef Constraint,
19909 MVT VT) const {
19910 // First, see if this is a constraint that directly corresponds to a RISC-V
19911 // register class.
19912 if (Constraint.size() == 1) {
19913 switch (Constraint[0]) {
19914 case 'r':
19915 // TODO: Support fixed vectors up to XLen for P extension?
19916 if (VT.isVector())
19917 break;
19918 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
19919 return std::make_pair(0U, &RISCV::GPRF16RegClass);
19920 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
19921 return std::make_pair(0U, &RISCV::GPRF32RegClass);
19922 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
19923 return std::make_pair(0U, &RISCV::GPRPairRegClass);
19924 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
19925 case 'f':
19926 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
19927 return std::make_pair(0U, &RISCV::FPR16RegClass);
19928 if (Subtarget.hasStdExtF() && VT == MVT::f32)
19929 return std::make_pair(0U, &RISCV::FPR32RegClass);
19930 if (Subtarget.hasStdExtD() && VT == MVT::f64)
19931 return std::make_pair(0U, &RISCV::FPR64RegClass);
19932 break;
19933 default:
19934 break;
19935 }
19936 } else if (Constraint == "vr") {
19937 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
19938 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
19939 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
19940 return std::make_pair(0U, RC);
19941 }
19942 } else if (Constraint == "vm") {
19943 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
19944 return std::make_pair(0U, &RISCV::VMV0RegClass);
19945 }
19946
19947 // Clang will correctly decode the usage of register name aliases into their
19948 // official names. However, other frontends like `rustc` do not. This allows
19949 // users of these frontends to use the ABI names for registers in LLVM-style
19950 // register constraints.
19951 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
19952 .Case("{zero}", RISCV::X0)
19953 .Case("{ra}", RISCV::X1)
19954 .Case("{sp}", RISCV::X2)
19955 .Case("{gp}", RISCV::X3)
19956 .Case("{tp}", RISCV::X4)
19957 .Case("{t0}", RISCV::X5)
19958 .Case("{t1}", RISCV::X6)
19959 .Case("{t2}", RISCV::X7)
19960 .Cases("{s0}", "{fp}", RISCV::X8)
19961 .Case("{s1}", RISCV::X9)
19962 .Case("{a0}", RISCV::X10)
19963 .Case("{a1}", RISCV::X11)
19964 .Case("{a2}", RISCV::X12)
19965 .Case("{a3}", RISCV::X13)
19966 .Case("{a4}", RISCV::X14)
19967 .Case("{a5}", RISCV::X15)
19968 .Case("{a6}", RISCV::X16)
19969 .Case("{a7}", RISCV::X17)
19970 .Case("{s2}", RISCV::X18)
19971 .Case("{s3}", RISCV::X19)
19972 .Case("{s4}", RISCV::X20)
19973 .Case("{s5}", RISCV::X21)
19974 .Case("{s6}", RISCV::X22)
19975 .Case("{s7}", RISCV::X23)
19976 .Case("{s8}", RISCV::X24)
19977 .Case("{s9}", RISCV::X25)
19978 .Case("{s10}", RISCV::X26)
19979 .Case("{s11}", RISCV::X27)
19980 .Case("{t3}", RISCV::X28)
19981 .Case("{t4}", RISCV::X29)
19982 .Case("{t5}", RISCV::X30)
19983 .Case("{t6}", RISCV::X31)
19984 .Default(RISCV::NoRegister);
19985 if (XRegFromAlias != RISCV::NoRegister)
19986 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
19987
19988 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
19989 // TableGen record rather than the AsmName to choose registers for InlineAsm
19990 // constraints, plus we want to match those names to the widest floating point
19991 // register type available, manually select floating point registers here.
19992 //
19993 // The second case is the ABI name of the register, so that frontends can also
19994 // use the ABI names in register constraint lists.
19995 if (Subtarget.hasStdExtF()) {
19996 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
19997 .Cases("{f0}", "{ft0}", RISCV::F0_F)
19998 .Cases("{f1}", "{ft1}", RISCV::F1_F)
19999 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20000 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20001 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20002 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20003 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20004 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20005 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20006 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20007 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20008 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20009 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20010 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20011 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20012 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20013 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20014 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20015 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20016 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20017 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20018 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20019 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20020 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20021 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20022 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20023 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20024 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20025 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20026 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20027 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20028 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20029 .Default(RISCV::NoRegister);
20030 if (FReg != RISCV::NoRegister) {
20031 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20032 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20033 unsigned RegNo = FReg - RISCV::F0_F;
20034 unsigned DReg = RISCV::F0_D + RegNo;
20035 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20036 }
20037 if (VT == MVT::f32 || VT == MVT::Other)
20038 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20039 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20040 unsigned RegNo = FReg - RISCV::F0_F;
20041 unsigned HReg = RISCV::F0_H + RegNo;
20042 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20043 }
20044 }
20045 }
20046
20047 if (Subtarget.hasVInstructions()) {
20048 Register VReg = StringSwitch<Register>(Constraint.lower())
20049 .Case("{v0}", RISCV::V0)
20050 .Case("{v1}", RISCV::V1)
20051 .Case("{v2}", RISCV::V2)
20052 .Case("{v3}", RISCV::V3)
20053 .Case("{v4}", RISCV::V4)
20054 .Case("{v5}", RISCV::V5)
20055 .Case("{v6}", RISCV::V6)
20056 .Case("{v7}", RISCV::V7)
20057 .Case("{v8}", RISCV::V8)
20058 .Case("{v9}", RISCV::V9)
20059 .Case("{v10}", RISCV::V10)
20060 .Case("{v11}", RISCV::V11)
20061 .Case("{v12}", RISCV::V12)
20062 .Case("{v13}", RISCV::V13)
20063 .Case("{v14}", RISCV::V14)
20064 .Case("{v15}", RISCV::V15)
20065 .Case("{v16}", RISCV::V16)
20066 .Case("{v17}", RISCV::V17)
20067 .Case("{v18}", RISCV::V18)
20068 .Case("{v19}", RISCV::V19)
20069 .Case("{v20}", RISCV::V20)
20070 .Case("{v21}", RISCV::V21)
20071 .Case("{v22}", RISCV::V22)
20072 .Case("{v23}", RISCV::V23)
20073 .Case("{v24}", RISCV::V24)
20074 .Case("{v25}", RISCV::V25)
20075 .Case("{v26}", RISCV::V26)
20076 .Case("{v27}", RISCV::V27)
20077 .Case("{v28}", RISCV::V28)
20078 .Case("{v29}", RISCV::V29)
20079 .Case("{v30}", RISCV::V30)
20080 .Case("{v31}", RISCV::V31)
20081 .Default(RISCV::NoRegister);
20082 if (VReg != RISCV::NoRegister) {
20083 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20084 return std::make_pair(VReg, &RISCV::VMRegClass);
20085 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20086 return std::make_pair(VReg, &RISCV::VRRegClass);
20087 for (const auto *RC :
20088 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20089 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20090 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20091 return std::make_pair(VReg, RC);
20092 }
20093 }
20094 }
20095 }
20096
20097 std::pair<Register, const TargetRegisterClass *> Res =
20099
20100 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20101 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20102 // Subtarget into account.
20103 if (Res.second == &RISCV::GPRF16RegClass ||
20104 Res.second == &RISCV::GPRF32RegClass ||
20105 Res.second == &RISCV::GPRPairRegClass)
20106 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20107
20108 return Res;
20109}
20110
20113 // Currently only support length 1 constraints.
20114 if (ConstraintCode.size() == 1) {
20115 switch (ConstraintCode[0]) {
20116 case 'A':
20118 default:
20119 break;
20120 }
20121 }
20122
20123 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20124}
20125
20127 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20128 SelectionDAG &DAG) const {
20129 // Currently only support length 1 constraints.
20130 if (Constraint.size() == 1) {
20131 switch (Constraint[0]) {
20132 case 'I':
20133 // Validate & create a 12-bit signed immediate operand.
20134 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20135 uint64_t CVal = C->getSExtValue();
20136 if (isInt<12>(CVal))
20137 Ops.push_back(
20138 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20139 }
20140 return;
20141 case 'J':
20142 // Validate & create an integer zero operand.
20143 if (isNullConstant(Op))
20144 Ops.push_back(
20145 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20146 return;
20147 case 'K':
20148 // Validate & create a 5-bit unsigned immediate operand.
20149 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20150 uint64_t CVal = C->getZExtValue();
20151 if (isUInt<5>(CVal))
20152 Ops.push_back(
20153 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20154 }
20155 return;
20156 case 'S':
20158 return;
20159 default:
20160 break;
20161 }
20162 }
20163 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20164}
20165
20167 Instruction *Inst,
20168 AtomicOrdering Ord) const {
20169 if (Subtarget.hasStdExtZtso()) {
20170 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20171 return Builder.CreateFence(Ord);
20172 return nullptr;
20173 }
20174
20175 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20176 return Builder.CreateFence(Ord);
20177 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20178 return Builder.CreateFence(AtomicOrdering::Release);
20179 return nullptr;
20180}
20181
20183 Instruction *Inst,
20184 AtomicOrdering Ord) const {
20185 if (Subtarget.hasStdExtZtso()) {
20186 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20187 return Builder.CreateFence(Ord);
20188 return nullptr;
20189 }
20190
20191 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20192 return Builder.CreateFence(AtomicOrdering::Acquire);
20193 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
20196 return nullptr;
20197}
20198
20201 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20202 // point operations can't be used in an lr/sc sequence without breaking the
20203 // forward-progress guarantee.
20204 if (AI->isFloatingPointOperation() ||
20208
20209 // Don't expand forced atomics, we want to have __sync libcalls instead.
20210 if (Subtarget.hasForcedAtomics())
20212
20213 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20214 if (AI->getOperation() == AtomicRMWInst::Nand) {
20215 if (Subtarget.hasStdExtZacas() &&
20216 (Size >= 32 || Subtarget.hasStdExtZabha()))
20218 if (Size < 32)
20220 }
20221
20222 if (Size < 32 && !Subtarget.hasStdExtZabha())
20224
20226}
20227
20228static Intrinsic::ID
20230 if (XLen == 32) {
20231 switch (BinOp) {
20232 default:
20233 llvm_unreachable("Unexpected AtomicRMW BinOp");
20235 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20236 case AtomicRMWInst::Add:
20237 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20238 case AtomicRMWInst::Sub:
20239 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20241 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20242 case AtomicRMWInst::Max:
20243 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20244 case AtomicRMWInst::Min:
20245 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20247 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20249 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20250 }
20251 }
20252
20253 if (XLen == 64) {
20254 switch (BinOp) {
20255 default:
20256 llvm_unreachable("Unexpected AtomicRMW BinOp");
20258 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20259 case AtomicRMWInst::Add:
20260 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20261 case AtomicRMWInst::Sub:
20262 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20264 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20265 case AtomicRMWInst::Max:
20266 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20267 case AtomicRMWInst::Min:
20268 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20270 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20272 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20273 }
20274 }
20275
20276 llvm_unreachable("Unexpected XLen\n");
20277}
20278
20280 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20281 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20282 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20283 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20284 // mask, as this produces better code than the LR/SC loop emitted by
20285 // int_riscv_masked_atomicrmw_xchg.
20286 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20287 isa<ConstantInt>(AI->getValOperand())) {
20288 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20289 if (CVal->isZero())
20290 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20291 Builder.CreateNot(Mask, "Inv_Mask"),
20292 AI->getAlign(), Ord);
20293 if (CVal->isMinusOne())
20294 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20295 AI->getAlign(), Ord);
20296 }
20297
20298 unsigned XLen = Subtarget.getXLen();
20299 Value *Ordering =
20300 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20301 Type *Tys[] = {AlignedAddr->getType()};
20302 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20303 AI->getModule(),
20305
20306 if (XLen == 64) {
20307 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20308 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20309 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20310 }
20311
20312 Value *Result;
20313
20314 // Must pass the shift amount needed to sign extend the loaded value prior
20315 // to performing a signed comparison for min/max. ShiftAmt is the number of
20316 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20317 // is the number of bits to left+right shift the value in order to
20318 // sign-extend.
20319 if (AI->getOperation() == AtomicRMWInst::Min ||
20321 const DataLayout &DL = AI->getModule()->getDataLayout();
20322 unsigned ValWidth =
20323 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20324 Value *SextShamt =
20325 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20326 Result = Builder.CreateCall(LrwOpScwLoop,
20327 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20328 } else {
20329 Result =
20330 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20331 }
20332
20333 if (XLen == 64)
20334 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20335 return Result;
20336}
20337
20340 AtomicCmpXchgInst *CI) const {
20341 // Don't expand forced atomics, we want to have __sync libcalls instead.
20342 if (Subtarget.hasForcedAtomics())
20344
20346 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20347 (Size == 8 || Size == 16))
20350}
20351
20353 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20354 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20355 unsigned XLen = Subtarget.getXLen();
20356 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20357 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20358 if (XLen == 64) {
20359 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20360 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20361 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20362 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20363 }
20364 Type *Tys[] = {AlignedAddr->getType()};
20365 Function *MaskedCmpXchg =
20366 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20367 Value *Result = Builder.CreateCall(
20368 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20369 if (XLen == 64)
20370 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20371 return Result;
20372}
20373
20375 EVT DataVT) const {
20376 // We have indexed loads for all supported EEW types. Indices are always
20377 // zero extended.
20378 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20379 isTypeLegal(Extend.getValueType()) &&
20380 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20381 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20382}
20383
20385 EVT VT) const {
20386 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20387 return false;
20388
20389 switch (FPVT.getSimpleVT().SimpleTy) {
20390 case MVT::f16:
20391 return Subtarget.hasStdExtZfhmin();
20392 case MVT::f32:
20393 return Subtarget.hasStdExtF();
20394 case MVT::f64:
20395 return Subtarget.hasStdExtD();
20396 default:
20397 return false;
20398 }
20399}
20400
20402 // If we are using the small code model, we can reduce size of jump table
20403 // entry to 4 bytes.
20404 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20407 }
20409}
20410
20412 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20413 unsigned uid, MCContext &Ctx) const {
20414 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20416 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20417}
20418
20420 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20421 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20422 // a power of two as well.
20423 // FIXME: This doesn't work for zve32, but that's already broken
20424 // elsewhere for the same reason.
20425 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20426 static_assert(RISCV::RVVBitsPerBlock == 64,
20427 "RVVBitsPerBlock changed, audit needed");
20428 return true;
20429}
20430
20432 SDValue &Offset,
20434 SelectionDAG &DAG) const {
20435 // Target does not support indexed loads.
20436 if (!Subtarget.hasVendorXTHeadMemIdx())
20437 return false;
20438
20439 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20440 return false;
20441
20442 Base = Op->getOperand(0);
20443 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20444 int64_t RHSC = RHS->getSExtValue();
20445 if (Op->getOpcode() == ISD::SUB)
20446 RHSC = -(uint64_t)RHSC;
20447
20448 // The constants that can be encoded in the THeadMemIdx instructions
20449 // are of the form (sign_extend(imm5) << imm2).
20450 bool isLegalIndexedOffset = false;
20451 for (unsigned i = 0; i < 4; i++)
20452 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20453 isLegalIndexedOffset = true;
20454 break;
20455 }
20456
20457 if (!isLegalIndexedOffset)
20458 return false;
20459
20460 Offset = Op->getOperand(1);
20461 return true;
20462 }
20463
20464 return false;
20465}
20466
20468 SDValue &Offset,
20470 SelectionDAG &DAG) const {
20471 EVT VT;
20472 SDValue Ptr;
20473 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20474 VT = LD->getMemoryVT();
20475 Ptr = LD->getBasePtr();
20476 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20477 VT = ST->getMemoryVT();
20478 Ptr = ST->getBasePtr();
20479 } else
20480 return false;
20481
20482 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20483 return false;
20484
20485 AM = ISD::PRE_INC;
20486 return true;
20487}
20488
20490 SDValue &Base,
20491 SDValue &Offset,
20493 SelectionDAG &DAG) const {
20494 EVT VT;
20495 SDValue Ptr;
20496 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20497 VT = LD->getMemoryVT();
20498 Ptr = LD->getBasePtr();
20499 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20500 VT = ST->getMemoryVT();
20501 Ptr = ST->getBasePtr();
20502 } else
20503 return false;
20504
20505 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20506 return false;
20507 // Post-indexing updates the base, so it's not a valid transform
20508 // if that's not the same as the load's pointer.
20509 if (Ptr != Base)
20510 return false;
20511
20512 AM = ISD::POST_INC;
20513 return true;
20514}
20515
20517 EVT VT) const {
20518 EVT SVT = VT.getScalarType();
20519
20520 if (!SVT.isSimple())
20521 return false;
20522
20523 switch (SVT.getSimpleVT().SimpleTy) {
20524 case MVT::f16:
20525 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20526 : Subtarget.hasStdExtZfhOrZhinx();
20527 case MVT::f32:
20528 return Subtarget.hasStdExtFOrZfinx();
20529 case MVT::f64:
20530 return Subtarget.hasStdExtDOrZdinx();
20531 default:
20532 break;
20533 }
20534
20535 return false;
20536}
20537
20539 // Zacas will use amocas.w which does not require extension.
20540 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20541}
20542
20544 const Constant *PersonalityFn) const {
20545 return RISCV::X10;
20546}
20547
20549 const Constant *PersonalityFn) const {
20550 return RISCV::X11;
20551}
20552
20554 // Return false to suppress the unnecessary extensions if the LibCall
20555 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20556 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20557 Type.getSizeInBits() < Subtarget.getXLen()))
20558 return false;
20559
20560 return true;
20561}
20562
20564 if (Subtarget.is64Bit() && Type == MVT::i32)
20565 return true;
20566
20567 return IsSigned;
20568}
20569
20571 SDValue C) const {
20572 // Check integral scalar types.
20573 const bool HasExtMOrZmmul =
20574 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
20575 if (!VT.isScalarInteger())
20576 return false;
20577
20578 // Omit the optimization if the sub target has the M extension and the data
20579 // size exceeds XLen.
20580 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20581 return false;
20582
20583 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
20584 // Break the MUL to a SLLI and an ADD/SUB.
20585 const APInt &Imm = ConstNode->getAPIntValue();
20586 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20587 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20588 return true;
20589
20590 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20591 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20592 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20593 (Imm - 8).isPowerOf2()))
20594 return true;
20595
20596 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20597 // a pair of LUI/ADDI.
20598 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20599 ConstNode->hasOneUse()) {
20600 APInt ImmS = Imm.ashr(Imm.countr_zero());
20601 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20602 (1 - ImmS).isPowerOf2())
20603 return true;
20604 }
20605 }
20606
20607 return false;
20608}
20609
20611 SDValue ConstNode) const {
20612 // Let the DAGCombiner decide for vectors.
20613 EVT VT = AddNode.getValueType();
20614 if (VT.isVector())
20615 return true;
20616
20617 // Let the DAGCombiner decide for larger types.
20618 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20619 return true;
20620
20621 // It is worse if c1 is simm12 while c1*c2 is not.
20622 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
20623 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
20624 const APInt &C1 = C1Node->getAPIntValue();
20625 const APInt &C2 = C2Node->getAPIntValue();
20626 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
20627 return false;
20628
20629 // Default to true and let the DAGCombiner decide.
20630 return true;
20631}
20632
20634 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20635 unsigned *Fast) const {
20636 if (!VT.isVector()) {
20637 if (Fast)
20638 *Fast = Subtarget.enableUnalignedScalarMem();
20639 return Subtarget.enableUnalignedScalarMem();
20640 }
20641
20642 // All vector implementations must support element alignment
20643 EVT ElemVT = VT.getVectorElementType();
20644 if (Alignment >= ElemVT.getStoreSize()) {
20645 if (Fast)
20646 *Fast = 1;
20647 return true;
20648 }
20649
20650 // Note: We lower an unmasked unaligned vector access to an equally sized
20651 // e8 element type access. Given this, we effectively support all unmasked
20652 // misaligned accesses. TODO: Work through the codegen implications of
20653 // allowing such accesses to be formed, and considered fast.
20654 if (Fast)
20655 *Fast = Subtarget.enableUnalignedVectorMem();
20656 return Subtarget.enableUnalignedVectorMem();
20657}
20658
20659
20661 const AttributeList &FuncAttributes) const {
20662 if (!Subtarget.hasVInstructions())
20663 return MVT::Other;
20664
20665 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
20666 return MVT::Other;
20667
20668 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
20669 // has an expansion threshold, and we want the number of hardware memory
20670 // operations to correspond roughly to that threshold. LMUL>1 operations
20671 // are typically expanded linearly internally, and thus correspond to more
20672 // than one actual memory operation. Note that store merging and load
20673 // combining will typically form larger LMUL operations from the LMUL1
20674 // operations emitted here, and that's okay because combining isn't
20675 // introducing new memory operations; it's just merging existing ones.
20676 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
20677 if (Op.size() < MinVLenInBytes)
20678 // TODO: Figure out short memops. For the moment, do the default thing
20679 // which ends up using scalar sequences.
20680 return MVT::Other;
20681
20682 // Prefer i8 for non-zero memset as it allows us to avoid materializing
20683 // a large scalar constant and instead use vmv.v.x/i to do the
20684 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
20685 // maximize the chance we can encode the size in the vsetvli.
20686 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
20687 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
20688
20689 // Do we have sufficient alignment for our preferred VT? If not, revert
20690 // to largest size allowed by our alignment criteria.
20691 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
20692 Align RequiredAlign(PreferredVT.getStoreSize());
20693 if (Op.isFixedDstAlign())
20694 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
20695 if (Op.isMemcpy())
20696 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
20697 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
20698 }
20699 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
20700}
20701
20703 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20704 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20705 bool IsABIRegCopy = CC.has_value();
20706 EVT ValueVT = Val.getValueType();
20707 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20708 PartVT == MVT::f32) {
20709 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
20710 // nan, and cast to f32.
20711 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
20712 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
20713 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
20714 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
20715 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
20716 Parts[0] = Val;
20717 return true;
20718 }
20719
20720 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20721 LLVMContext &Context = *DAG.getContext();
20722 EVT ValueEltVT = ValueVT.getVectorElementType();
20723 EVT PartEltVT = PartVT.getVectorElementType();
20724 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20725 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20726 if (PartVTBitSize % ValueVTBitSize == 0) {
20727 assert(PartVTBitSize >= ValueVTBitSize);
20728 // If the element types are different, bitcast to the same element type of
20729 // PartVT first.
20730 // Give an example here, we want copy a <vscale x 1 x i8> value to
20731 // <vscale x 4 x i16>.
20732 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
20733 // subvector, then we can bitcast to <vscale x 4 x i16>.
20734 if (ValueEltVT != PartEltVT) {
20735 if (PartVTBitSize > ValueVTBitSize) {
20736 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20737 assert(Count != 0 && "The number of element should not be zero.");
20738 EVT SameEltTypeVT =
20739 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20740 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
20741 DAG.getUNDEF(SameEltTypeVT), Val,
20742 DAG.getVectorIdxConstant(0, DL));
20743 }
20744 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
20745 } else {
20746 Val =
20747 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
20748 Val, DAG.getVectorIdxConstant(0, DL));
20749 }
20750 Parts[0] = Val;
20751 return true;
20752 }
20753 }
20754 return false;
20755}
20756
20758 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
20759 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
20760 bool IsABIRegCopy = CC.has_value();
20761 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20762 PartVT == MVT::f32) {
20763 SDValue Val = Parts[0];
20764
20765 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
20766 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
20767 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
20768 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
20769 return Val;
20770 }
20771
20772 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20773 LLVMContext &Context = *DAG.getContext();
20774 SDValue Val = Parts[0];
20775 EVT ValueEltVT = ValueVT.getVectorElementType();
20776 EVT PartEltVT = PartVT.getVectorElementType();
20777 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20778 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20779 if (PartVTBitSize % ValueVTBitSize == 0) {
20780 assert(PartVTBitSize >= ValueVTBitSize);
20781 EVT SameEltTypeVT = ValueVT;
20782 // If the element types are different, convert it to the same element type
20783 // of PartVT.
20784 // Give an example here, we want copy a <vscale x 1 x i8> value from
20785 // <vscale x 4 x i16>.
20786 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
20787 // then we can extract <vscale x 1 x i8>.
20788 if (ValueEltVT != PartEltVT) {
20789 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20790 assert(Count != 0 && "The number of element should not be zero.");
20791 SameEltTypeVT =
20792 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20793 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
20794 }
20795 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
20796 DAG.getVectorIdxConstant(0, DL));
20797 return Val;
20798 }
20799 }
20800 return SDValue();
20801}
20802
20804 // When aggressively optimizing for code size, we prefer to use a div
20805 // instruction, as it is usually smaller than the alternative sequence.
20806 // TODO: Add vector division?
20807 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
20808 return OptSize && !VT.isVector();
20809}
20810
20812 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
20813 // some situation.
20814 unsigned Opc = N->getOpcode();
20815 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
20816 return false;
20817 return true;
20818}
20819
20820static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
20821 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
20822 Function *ThreadPointerFunc =
20823 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
20824 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
20825 IRB.CreateCall(ThreadPointerFunc), Offset);
20826}
20827
20829 // Fuchsia provides a fixed TLS slot for the stack cookie.
20830 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
20831 if (Subtarget.isTargetFuchsia())
20832 return useTpOffset(IRB, -0x10);
20833
20835}
20836
20838 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
20839 const DataLayout &DL) const {
20840 EVT VT = getValueType(DL, VTy);
20841 // Don't lower vlseg/vsseg for vector types that can't be split.
20842 if (!isTypeLegal(VT))
20843 return false;
20844
20846 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
20847 Alignment))
20848 return false;
20849
20850 MVT ContainerVT = VT.getSimpleVT();
20851
20852 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
20853 if (!Subtarget.useRVVForFixedLengthVectors())
20854 return false;
20855 // Sometimes the interleaved access pass picks up splats as interleaves of
20856 // one element. Don't lower these.
20857 if (FVTy->getNumElements() < 2)
20858 return false;
20859
20861 }
20862
20863 // Need to make sure that EMUL * NFIELDS ≤ 8
20864 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
20865 if (Fractional)
20866 return true;
20867 return Factor * LMUL <= 8;
20868}
20869
20871 Align Alignment) const {
20872 if (!Subtarget.hasVInstructions())
20873 return false;
20874
20875 // Only support fixed vectors if we know the minimum vector size.
20876 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
20877 return false;
20878
20879 EVT ScalarType = DataType.getScalarType();
20880 if (!isLegalElementTypeForRVV(ScalarType))
20881 return false;
20882
20883 if (!Subtarget.enableUnalignedVectorMem() &&
20884 Alignment < ScalarType.getStoreSize())
20885 return false;
20886
20887 return true;
20888}
20889
20891 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
20892 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
20893 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
20894 Intrinsic::riscv_seg8_load};
20895
20896/// Lower an interleaved load into a vlsegN intrinsic.
20897///
20898/// E.g. Lower an interleaved load (Factor = 2):
20899/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
20900/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
20901/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
20902///
20903/// Into:
20904/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
20905/// %ptr, i64 4)
20906/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
20907/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
20910 ArrayRef<unsigned> Indices, unsigned Factor) const {
20911 IRBuilder<> Builder(LI);
20912
20913 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
20914 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
20916 LI->getModule()->getDataLayout()))
20917 return false;
20918
20919 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
20920
20921 Function *VlsegNFunc =
20923 {VTy, LI->getPointerOperandType(), XLenTy});
20924
20925 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
20926
20927 CallInst *VlsegN =
20928 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
20929
20930 for (unsigned i = 0; i < Shuffles.size(); i++) {
20931 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
20932 Shuffles[i]->replaceAllUsesWith(SubVec);
20933 }
20934
20935 return true;
20936}
20937
20939 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
20940 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
20941 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
20942 Intrinsic::riscv_seg8_store};
20943
20944/// Lower an interleaved store into a vssegN intrinsic.
20945///
20946/// E.g. Lower an interleaved store (Factor = 3):
20947/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
20948/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
20949/// store <12 x i32> %i.vec, <12 x i32>* %ptr
20950///
20951/// Into:
20952/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
20953/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
20954/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
20955/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
20956/// %ptr, i32 4)
20957///
20958/// Note that the new shufflevectors will be removed and we'll only generate one
20959/// vsseg3 instruction in CodeGen.
20961 ShuffleVectorInst *SVI,
20962 unsigned Factor) const {
20963 IRBuilder<> Builder(SI);
20964 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
20965 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
20966 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
20967 ShuffleVTy->getNumElements() / Factor);
20968 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
20969 SI->getPointerAddressSpace(),
20970 SI->getModule()->getDataLayout()))
20971 return false;
20972
20973 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
20974
20975 Function *VssegNFunc =
20976 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
20977 {VTy, SI->getPointerOperandType(), XLenTy});
20978
20979 auto Mask = SVI->getShuffleMask();
20981
20982 for (unsigned i = 0; i < Factor; i++) {
20983 Value *Shuffle = Builder.CreateShuffleVector(
20984 SVI->getOperand(0), SVI->getOperand(1),
20985 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
20986 Ops.push_back(Shuffle);
20987 }
20988 // This VL should be OK (should be executable in one vsseg instruction,
20989 // potentially under larger LMULs) because we checked that the fixed vector
20990 // type fits in isLegalInterleavedAccessType
20991 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
20992 Ops.append({SI->getPointerOperand(), VL});
20993
20994 Builder.CreateCall(VssegNFunc, Ops);
20995
20996 return true;
20997}
20998
21000 LoadInst *LI) const {
21001 assert(LI->isSimple());
21002 IRBuilder<> Builder(LI);
21003
21004 // Only deinterleave2 supported at present.
21005 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
21006 return false;
21007
21008 unsigned Factor = 2;
21009
21010 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21011 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21012
21013 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21015 LI->getModule()->getDataLayout()))
21016 return false;
21017
21018 Function *VlsegNFunc;
21019 Value *VL;
21020 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21022
21023 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21024 VlsegNFunc = Intrinsic::getDeclaration(
21025 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21026 {ResVTy, LI->getPointerOperandType(), XLenTy});
21027 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21028 } else {
21029 static const Intrinsic::ID IntrIds[] = {
21030 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21031 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21032 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21033 Intrinsic::riscv_vlseg8};
21034
21035 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21036 {ResVTy, XLenTy});
21037 VL = Constant::getAllOnesValue(XLenTy);
21038 Ops.append(Factor, PoisonValue::get(ResVTy));
21039 }
21040
21041 Ops.append({LI->getPointerOperand(), VL});
21042
21043 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21044 DI->replaceAllUsesWith(Vlseg);
21045
21046 return true;
21047}
21048
21050 StoreInst *SI) const {
21051 assert(SI->isSimple());
21052 IRBuilder<> Builder(SI);
21053
21054 // Only interleave2 supported at present.
21055 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
21056 return false;
21057
21058 unsigned Factor = 2;
21059
21060 VectorType *VTy = cast<VectorType>(II->getType());
21061 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21062
21063 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21064 SI->getPointerAddressSpace(),
21065 SI->getModule()->getDataLayout()))
21066 return false;
21067
21068 Function *VssegNFunc;
21069 Value *VL;
21070 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21071
21072 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21073 VssegNFunc = Intrinsic::getDeclaration(
21074 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21075 {InVTy, SI->getPointerOperandType(), XLenTy});
21076 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21077 } else {
21078 static const Intrinsic::ID IntrIds[] = {
21079 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21080 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21081 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21082 Intrinsic::riscv_vsseg8};
21083
21084 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21085 {InVTy, XLenTy});
21086 VL = Constant::getAllOnesValue(XLenTy);
21087 }
21088
21089 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21090 SI->getPointerOperand(), VL});
21091
21092 return true;
21093}
21094
21098 const TargetInstrInfo *TII) const {
21099 assert(MBBI->isCall() && MBBI->getCFIType() &&
21100 "Invalid call instruction for a KCFI check");
21101 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21102 MBBI->getOpcode()));
21103
21104 MachineOperand &Target = MBBI->getOperand(0);
21105 Target.setIsRenamable(false);
21106
21107 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21108 .addReg(Target.getReg())
21109 .addImm(MBBI->getCFIType())
21110 .getInstr();
21111}
21112
21113#define GET_REGISTER_MATCHER
21114#include "RISCVGenAsmMatcher.inc"
21115
21118 const MachineFunction &MF) const {
21120 if (Reg == RISCV::NoRegister)
21122 if (Reg == RISCV::NoRegister)
21124 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21125 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21126 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21127 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21128 StringRef(RegName) + "\"."));
21129 return Reg;
21130}
21131
21134 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21135
21136 if (NontemporalInfo == nullptr)
21138
21139 // 1 for default value work as __RISCV_NTLH_ALL
21140 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21141 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21142 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21143 // 5 -> __RISCV_NTLH_ALL
21144 int NontemporalLevel = 5;
21145 const MDNode *RISCVNontemporalInfo =
21146 I.getMetadata("riscv-nontemporal-domain");
21147 if (RISCVNontemporalInfo != nullptr)
21148 NontemporalLevel =
21149 cast<ConstantInt>(
21150 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21151 ->getValue())
21152 ->getZExtValue();
21153
21154 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21155 "RISC-V target doesn't support this non-temporal domain.");
21156
21157 NontemporalLevel -= 2;
21159 if (NontemporalLevel & 0b1)
21160 Flags |= MONontemporalBit0;
21161 if (NontemporalLevel & 0b10)
21162 Flags |= MONontemporalBit1;
21163
21164 return Flags;
21165}
21166
21169
21170 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21172 TargetFlags |= (NodeFlags & MONontemporalBit0);
21173 TargetFlags |= (NodeFlags & MONontemporalBit1);
21174 return TargetFlags;
21175}
21176
21178 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21179 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21180}
21181
21183 if (VT.isScalableVector())
21184 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21185 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21186 return true;
21187 return Subtarget.hasStdExtZbb() &&
21188 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21189}
21190
21192 ISD::CondCode Cond) const {
21193 return isCtpopFast(VT) ? 0 : 1;
21194}
21195
21197
21198 // GISel support is in progress or complete for these opcodes.
21199 unsigned Op = Inst.getOpcode();
21200 if (Op == Instruction::Add || Op == Instruction::Sub ||
21201 Op == Instruction::And || Op == Instruction::Or ||
21202 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21203 Op == Instruction::ShuffleVector || Op == Instruction::Load)
21204 return false;
21205
21206 if (Inst.getType()->isScalableTy())
21207 return true;
21208
21209 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21210 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21211 !isa<ReturnInst>(&Inst))
21212 return true;
21213
21214 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21215 if (AI->getAllocatedType()->isScalableTy())
21216 return true;
21217 }
21218
21219 return false;
21220}
21221
21222SDValue
21223RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21224 SelectionDAG &DAG,
21225 SmallVectorImpl<SDNode *> &Created) const {
21227 if (isIntDivCheap(N->getValueType(0), Attr))
21228 return SDValue(N, 0); // Lower SDIV as SDIV
21229
21230 // Only perform this transform if short forward branch opt is supported.
21231 if (!Subtarget.hasShortForwardBranchOpt())
21232 return SDValue();
21233 EVT VT = N->getValueType(0);
21234 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21235 return SDValue();
21236
21237 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21238 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21239 return SDValue();
21240 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21241}
21242
21243bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21244 EVT VT, const APInt &AndMask) const {
21245 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21246 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21248}
21249
21250unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21251 return Subtarget.getMinimumJumpTableEntries();
21252}
21253
21254// Handle single arg such as return value.
21255template <typename Arg>
21256void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21257 // This lambda determines whether an array of types are constructed by
21258 // homogeneous vector types.
21259 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21260 // First, extract the first element in the argument type.
21261 auto It = ArgList.begin();
21262 MVT FirstArgRegType = It->VT;
21263
21264 // Return if there is no return or the type needs split.
21265 if (It == ArgList.end() || It->Flags.isSplit())
21266 return false;
21267
21268 ++It;
21269
21270 // Return if this argument type contains only 1 element, or it's not a
21271 // vector type.
21272 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21273 return false;
21274
21275 // Second, check if the following elements in this argument type are all the
21276 // same.
21277 for (; It != ArgList.end(); ++It)
21278 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21279 return false;
21280
21281 return true;
21282 };
21283
21284 if (isHomogeneousScalableVectorType(ArgList)) {
21285 // Handle as tuple type
21286 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21287 } else {
21288 // Handle as normal vector type
21289 bool FirstVMaskAssigned = false;
21290 for (const auto &OutArg : ArgList) {
21291 MVT RegisterVT = OutArg.VT;
21292
21293 // Skip non-RVV register type
21294 if (!RegisterVT.isVector())
21295 continue;
21296
21297 if (RegisterVT.isFixedLengthVector())
21298 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21299
21300 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21301 RVVArgInfos.push_back({1, RegisterVT, true});
21302 FirstVMaskAssigned = true;
21303 continue;
21304 }
21305
21306 RVVArgInfos.push_back({1, RegisterVT, false});
21307 }
21308 }
21309}
21310
21311// Handle multiple args.
21312template <>
21313void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21314 const DataLayout &DL = MF->getDataLayout();
21315 const Function &F = MF->getFunction();
21316 LLVMContext &Context = F.getContext();
21317
21318 bool FirstVMaskAssigned = false;
21319 for (Type *Ty : TypeList) {
21320 StructType *STy = dyn_cast<StructType>(Ty);
21321 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21322 Type *ElemTy = STy->getTypeAtIndex(0U);
21323 EVT VT = TLI->getValueType(DL, ElemTy);
21324 MVT RegisterVT =
21325 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21326 unsigned NumRegs =
21327 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21328
21329 RVVArgInfos.push_back(
21330 {NumRegs * STy->getNumElements(), RegisterVT, false});
21331 } else {
21332 SmallVector<EVT, 4> ValueVTs;
21333 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21334
21335 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21336 ++Value) {
21337 EVT VT = ValueVTs[Value];
21338 MVT RegisterVT =
21339 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21340 unsigned NumRegs =
21341 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21342
21343 // Skip non-RVV register type
21344 if (!RegisterVT.isVector())
21345 continue;
21346
21347 if (RegisterVT.isFixedLengthVector())
21348 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21349
21350 if (!FirstVMaskAssigned &&
21351 RegisterVT.getVectorElementType() == MVT::i1) {
21352 RVVArgInfos.push_back({1, RegisterVT, true});
21353 FirstVMaskAssigned = true;
21354 --NumRegs;
21355 }
21356
21357 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21358 }
21359 }
21360 }
21361}
21362
21363void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21364 unsigned StartReg) {
21365 assert((StartReg % LMul) == 0 &&
21366 "Start register number should be multiple of lmul");
21367 const MCPhysReg *VRArrays;
21368 switch (LMul) {
21369 default:
21370 report_fatal_error("Invalid lmul");
21371 case 1:
21372 VRArrays = ArgVRs;
21373 break;
21374 case 2:
21375 VRArrays = ArgVRM2s;
21376 break;
21377 case 4:
21378 VRArrays = ArgVRM4s;
21379 break;
21380 case 8:
21381 VRArrays = ArgVRM8s;
21382 break;
21383 }
21384
21385 for (unsigned i = 0; i < NF; ++i)
21386 if (StartReg)
21387 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
21388 else
21389 AllocatedPhysRegs.push_back(MCPhysReg());
21390}
21391
21392/// This function determines if each RVV argument is passed by register, if the
21393/// argument can be assigned to a VR, then give it a specific register.
21394/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21395void RVVArgDispatcher::compute() {
21396 uint32_t AssignedMap = 0;
21397 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21398 // Allocate first vector mask argument to V0.
21399 if (ArgInfo.FirstVMask) {
21400 AllocatedPhysRegs.push_back(RISCV::V0);
21401 return;
21402 }
21403
21404 unsigned RegsNeeded = divideCeil(
21405 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
21406 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21407 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21408 StartReg += RegsNeeded) {
21409 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21410 if ((AssignedMap & Map) == 0) {
21411 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
21412 AssignedMap |= Map;
21413 return;
21414 }
21415 }
21416
21417 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
21418 };
21419
21420 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21421 allocate(RVVArgInfos[i]);
21422}
21423
21425 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21426 return AllocatedPhysRegs[CurIdx++];
21427}
21428
21430
21431#define GET_RISCVVIntrinsicsTable_IMPL
21432#include "RISCVGenSearchableTables.inc"
21433
21434} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static bool IsSelect(MachineInstr &MI)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:977
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:299
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:201
iterator_range< arg_iterator > args()
Definition: Function.h:838
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:263
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:339
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:206
Argument * getArg(unsigned i) const
Definition: Function.h:832
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1881
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1834
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getScalarStoreSize() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:398
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:387
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:435
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:612
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:243
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1132
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1128
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1345
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1376
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1161
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1277
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1278
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1037
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1361
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1365
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1234
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1239
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1375
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:913
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1273
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1274
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1406
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:885
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1194
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1358
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1227
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1362
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:994
@ STRICT_LROUND
Definition: ISDOpcodes.h:431
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1083
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1276
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1062
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:586
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:646
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1377
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1157
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1370
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1271
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1217
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:856
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1335
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1254
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1279
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1047
@ STRICT_LRINT
Definition: ISDOpcodes.h:433
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:591
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1378
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:429
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:922
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1269
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:990
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1270
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1188
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1214
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:636
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:434
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:612
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1268
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ STRICT_LLROUND
Definition: ISDOpcodes.h:432
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:855
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1366
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1152
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1076
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:580
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1491
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1491
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1478
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1412
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1529
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1509
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1574
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:560
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1507
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1030
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:270
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:988
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:276
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:291
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)